Code
# Clean working environment
rm(list = ls())This supplementary file contains the R code for processing and analysing the raw data, and creating figures for the manuscript - Do females and males differ in organ scaling? A phylogenetic multilevel analysis and its implications for toxicology research.
Both the data subset used in this study and the phylogenetic tree are part of a database on organ sizes, which is available here: https://felixpleiva.github.io/organ_size_DB/.
# Clean working environment
rm(list = ls())dat <- read.csv("../outputs/data_for_analyses.csv") %>%
rename(
sex = sex_individual,
life_stage = life_stage_individual,
original_category = trait_size_category,
body_size = body_size_mean,
organ_size = organ_size_mean,
phylo = species_underscored
) %>%
mutate(organ_grouped = recode(organ_grouped,
pituitary_gland = "pituitary_glands",
lung = "lungs",
kidney = "kidneys")) %>%
mutate(log10_body_size = log10(body_size),
log10_organ_size = log10(organ_size))glimpse(dat)Rows: 478
Columns: 23
$ study_id <chr> "rayyan-150591802", "rayyan-150591802", "rayyan-1505…
$ phylum <chr> "Chordata", "Chordata", "Chordata", "Chordata", "Cho…
$ class <chr> "Actinopterygii", "Actinopterygii", "Mammalia", "Mam…
$ order <chr> "Anguilliformes", "Anguilliformes", "Primates", "Pri…
$ family <chr> "Anguillidae", "Anguillidae", "Cercopithecidae", "Ce…
$ genus <chr> "Anguilla", "Anguilla", "Chlorocebus", "Chlorocebus"…
$ species <chr> "Anguilla anguilla", "Anguilla anguilla", "Chloroceb…
$ species_reported <chr> "Anguilla anguilla", "Anguilla anguilla", "Chloroceb…
$ phylo <chr> "Anguilla_anguilla", "Anguilla_anguilla", "Chloroceb…
$ habitat <chr> "freshwater", "freshwater", "terrestrial", "terrestr…
$ life_stage <chr> NA, NA, "adult", "adult", "adult", "adult", "adult",…
$ sex <chr> "female", "male", "female", "male", "female", "male"…
$ organ_side <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ original_category <chr> "liver", "liver", "brain", "brain", "heart", "heart"…
$ system <chr> "digestive", "digestive", "nervous", "nervous", "cir…
$ functional_roles <chr> "resource exchange and consumption", "resource excha…
$ organ_grouped <chr> "liver", "liver", "brain", "brain", "heart", "heart"…
$ id_cluster <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ body_size <dbl> 684, 71, 5430, 7830, 5430, 7830, 5430, 7830, 5430, 7…
$ organ_size <dbl> 9.30, 0.80, 70.82, 77.42, 25.22, 50.87, 114.31, 149.…
$ is_paired_organ <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no"…
$ log10_body_size <dbl> 2.8, 1.9, 3.7, 3.9, 3.7, 3.9, 3.7, 3.9, 3.7, 3.9, 3.…
$ log10_organ_size <dbl> 0.968, -0.097, 1.850, 1.889, 1.402, 1.706, 2.058, 2.…
columns_to_factor <- c(
"study_id",
"phylum",
"class",
"order",
"family",
"genus",
"species",
"species_reported",
"phylo",
"habitat",
"life_stage",
"sex",
"organ_side",
"original_category",
"system",
"functional_roles",
"organ_grouped",
"id_cluster",
"is_paired_organ"
)columns_to_numeric <- c(
"body_size",
"organ_size",
"log10_organ_size",
"log10_body_size"
)# Apply the conversions to the dataset.
dat <- dat %>%
mutate(
across(all_of(columns_to_factor), as.factor),
across(all_of(columns_to_numeric), as.numeric)
)
# Confirm that changes have been applied correctly
glimpse(dat)Rows: 478
Columns: 23
$ study_id <fct> rayyan-150591802, rayyan-150591802, rayyan-150591976…
$ phylum <fct> Chordata, Chordata, Chordata, Chordata, Chordata, Ch…
$ class <fct> Actinopterygii, Actinopterygii, Mammalia, Mammalia, …
$ order <fct> Anguilliformes, Anguilliformes, Primates, Primates, …
$ family <fct> Anguillidae, Anguillidae, Cercopithecidae, Cercopith…
$ genus <fct> Anguilla, Anguilla, Chlorocebus, Chlorocebus, Chloro…
$ species <fct> Anguilla anguilla, Anguilla anguilla, Chlorocebus sa…
$ species_reported <fct> Anguilla anguilla, Anguilla anguilla, Chlorocebus ae…
$ phylo <fct> Anguilla_anguilla, Anguilla_anguilla, Chlorocebus_sa…
$ habitat <fct> freshwater, freshwater, terrestrial, terrestrial, te…
$ life_stage <fct> NA, NA, adult, adult, adult, adult, adult, adult, ad…
$ sex <fct> female, male, female, male, female, male, female, ma…
$ organ_side <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ original_category <fct> liver, liver, brain, brain, heart, heart, liver, liv…
$ system <fct> digestive, digestive, nervous, nervous, circulatory,…
$ functional_roles <fct> resource exchange and consumption, resource exchange…
$ organ_grouped <fct> liver, liver, brain, brain, heart, heart, liver, liv…
$ id_cluster <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ body_size <dbl> 684, 71, 5430, 7830, 5430, 7830, 5430, 7830, 5430, 7…
$ organ_size <dbl> 9.30, 0.80, 70.82, 77.42, 25.22, 50.87, 114.31, 149.…
$ is_paired_organ <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, …
$ log10_body_size <dbl> 2.8, 1.9, 3.7, 3.9, 3.7, 3.9, 3.7, 3.9, 3.7, 3.9, 3.…
$ log10_organ_size <dbl> 0.968, -0.097, 1.850, 1.889, 1.402, 1.706, 2.058, 2.…
The phylogenetic tree includes all 363 species for which organ size data are available. Our next step is to prune the tree to retain only those species for which organ size measurements exist for both males and females within the same study. This pruning is important because our photogenically informed analyses require the calculation of a variance-covariance matrix based on the shared evolutionary history of the species. The tree was obtained from the Open Tree of Life and therefore lacks branch lengths. We will estimate branch lengths using Grafen’s method, which assigns branch lengths proportionally based on node heights within the tree structure. This approach is widely used when empirical branch lengths are unavailable but a phylogenetic topology is known.
tree <- read.tree("../data/Phylogenetic tree for 363 species.tre")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat$phylo) # Tree species absent in database [1] "Apodemus_sylvaticus" "Apodemus_flavicollis"
[3] "Rhabdomys_pumilio" "Notomys_alexis"
[5] "Gerbillus_perpallidus" "Acomys_minous"
[7] "Abrothrix_longipilis" "Abrothrix_olivaceus"
[9] "Peromyscus_maniculatus" "Peromyscus_leucopus"
[11] "Phodopus_sungorus" "Mesocricetus_auratus"
[13] "Myodes_glareolus" "Microtus_pinetorum"
[15] "Microtus_pennsylvanicus" "Microtus_montanus"
[17] "Microtus_arvalis" "Microtus_agrestis"
[19] "Arvicola_amphibius" "Ondatra_zibethicus"
[21] "Jaculus_jaculus" "Liomys_salvini"
[23] "Dipodomys_merriami" "Dipodomys_spectabilis"
[25] "Castor_fiber" "Octodon_degus"
[27] "Cuniculus_paca" "Dasyprocta_punctata"
[29] "Dasyprocta_azarae" "Galea_musteloides"
[31] "Dolichotis_patagonum" "Hydrochoerus_hydrochaeris"
[33] "Hystrix_indica" "Marmota_monax"
[35] "Ictidomys_tridecemlineatus" "Tamias_striatus"
[37] "Glaucomys_volans" "Sciurus_vulgaris"
[39] "Sciurus_niger" "Sciurus_carolinensis"
[41] "Tamiasciurus_hudsonicus" "Glis_glis"
[43] "Oryctolagus_cuniculus" "Sylvilagus_floridanus"
[45] "Lepus_europaeus" "Tupaia_glis"
[47] "Nasalis_larvatus" "Trachypithecus_obscurus"
[49] "Trachypithecus_cristatus" "Trachypithecus_francoisi"
[51] "Trachypithecus_vetulus" "Trachypithecus_pileatus"
[53] "Semnopithecus_entellus" "Presbytis_melalophos_mitrata"
[55] "Colobus_angolensis" "Colobus_guereza"
[57] "Piliocolobus_badius" "Chlorocebus_pygerythrus"
[59] "Mandrillus_sphinx" "Papio_hamadryas"
[61] "Papio_anubis" "Theropithecus_gelada"
[63] "Macaca_nigra" "Macaca_arctoides"
[65] "Macaca_mulatta" "Homo_sapiens"
[67] "Gorilla_gorilla" "Symphalangus_syndactylus"
[69] "Nomascus_concolor" "Saguinus_geoffroyi"
[71] "Saguinus_oedipus" "Saguinus_leucopus"
[73] "Saguinus_labiatus" "Saguinus_mystax"
[75] "Saguinus_inustus" "Saguinus_fuscicollis_nigrifrons"
[77] "Saguinus_fuscicollis_fuscicollis" "Saguinus_nigricollis"
[79] "Callithrix_geoffroyi" "Callithrix_penicillata"
[81] "Callithrix_jacchus" "Callithrix_pygmaea"
[83] "Mico_argentatus" "Callimico_goeldii"
[85] "Leontopithecus_chrysomelas" "Leontopithecus_rosalia"
[87] "Aotus_trivirgatus" "Saimiri_boliviensis"
[89] "Saimiri_sciureus_macrodon" "Cebus_albifrons"
[91] "Sapajus_apella" "Lagothrix_poeppigii"
[93] "Lagothrix_lagotricha" "Ateles_geoffroyi"
[95] "Ateles_paniscus" "Ateles_belzebuth_chamek"
[97] "Alouatta_juara" "Alouatta_caraya"
[99] "Alouatta_seniculus_seniculus" "Alouatta_sara"
[101] "Cacajao_calvus" "Cacajao_melanocephalus"
[103] "Cacajao_rubicundus" "Pithecia_monachus"
[105] "Plecturocebus_cupreus" "Plecturocebus_moloch"
[107] "Cheirogaleus_medius" "Eulemur_fulvus_fulvus"
[109] "Eulemur_macaco" "Lemur_catta"
[111] "Varecia_rubra" "Pudu_puda"
[113] "Capreolus_capreolus" "Gazella_gazella"
[115] "Hexaprotodon_liberiensis" "Sus_scrofa"
[117] "Babyrousa_babyrussa" "Pecari_tajacu"
[119] "Tayassu_pecari" "Lasionycteris_noctivagans"
[121] "Nyctalus_noctula" "Lasiurus_borealis"
[123] "Tadarida_brasiliensis" "Phyllostomus_hastatus"
[125] "Pteropus_lylei" "Cynopterus_brachyotis"
[127] "Martes_pennanti" "Martes_foina"
[129] "Martes_martes" "Martes_zibellina"
[131] "Gulo_gulo" "Aonyx_cinereus"
[133] "Lutrogale_perspicillata" "Lutra_lutra"
[135] "Lontra_canadensis" "Mustela_erminea"
[137] "Mustela_nivalis" "Mustela_nigripes"
[139] "Mustela_putorius" "Potos_flavus"
[141] "Mephitis_mephitis" "Phoca_groenlandica"
[143] "Cystophora_cristata" "Zalophus_californianus"
[145] "Arctocephalus_pusillus" "Canis_lupus"
[147] "Lycaon_pictus" "Cuon_alpinus"
[149] "Canis_latrans" "Canis_aureus"
[151] "Vulpes_vulpes" "Vulpes_corsac"
[153] "Helogale_parvula" "Mungos_mungo"
[155] "Proteles_cristata" "Chrotogale_owstoni"
[157] "Arctictis_binturong" "Panthera_tigris"
[159] "Panthera_leo" "Felis_chaus"
[161] "Felis_nigripes" "Felis_silvestris"
[163] "Felis_catus" "Prionailurus_viverrinus"
[165] "Prionailurus_bengalensis" "Otocolobus_manul"
[167] "Puma_yagouaroundi" "Puma_concolor"
[169] "Lynx_rufus" "Lynx_canadensis"
[171] "Lynx_lynx" "Leopardus_colocolo"
[173] "Leopardus_tigrinus" "Leopardus_geoffroyi"
[175] "Leopardus_wiedii" "Leopardus_pardalis"
[177] "Leptailurus_serval" "Profelis_aurata"
[179] "Caracal_caracal" "Pardofelis_marmorata"
[181] "Crocidura_russula" "Suncus_murinus"
[183] "Blarina_brevicauda" "Sorex_minutus"
[185] "Sorex_araneus" "Sorex_cinereus"
[187] "Sorex_minutissimus" "Talpa_europaea"
[189] "Scalopus_aquaticus" "Erinaceus_europaeus"
[191] "Procavia_capensis" "Elephas_maximus"
[193] "Loxodonta_africana" "Echinops_telfairi"
[195] "Setifer_setosus" "Hemicentetes_semispinosus"
[197] "Tenrec_ecaudatus" "Rhynchocyon_petersi"
[199] "Dasypus_novemcinctus" "Tamandua_tetradactyla"
[201] "Myrmecophaga_tridactyla" "Choloepus_didactylus"
[203] "Bettongia_penicillata" "Potorous_tridactylus"
[205] "Macropus_fuliginosus" "Notamacropus_agilis"
[207] "Setonix_brachyurus" "Trichosurus_vulpecula"
[209] "Dasyurus_viverrinus" "Macrotis_lagotis"
[211] "Dromiciops_gliroides" "Monodelphis_domestica"
[213] "Thylamys_elegans" "Didelphis_virginiana"
[215] "Tachyglossus_aculeatus" "Sturnus_sericeus"
[217] "Sturnus_vulgaris" "Amadina_fasciata"
[219] "Lonchura_punctulata" "Lonchura_striata"
[221] "Taeniopygia_guttata" "Cardinalis_cardinalis"
[223] "Zonotrichia_leucophrys" "Passer_domesticus"
[225] "Sylvia_atricapilla" "Pycnonotus_sinensis"
[227] "Tyrannus_tyrannus" "Melopsittacus_undulatus"
[229] "Nymphicus_hollandicus" "Haliaeetus_albicilla"
[231] "Pandion_haliaetus" "Calidris_ruficollis"
[233] "Calidris_ferruginea" "Calidris_minuta"
[235] "Calidris_alba" "Calidris_alpina"
[237] "Calidris_maritima" "Calidris_tenuirostris"
[239] "Calidris_canutus" "Arenaria_interpres"
[241] "Tringa_totanus" "Limosa_lapponica"
[243] "Limosa_limosa" "Numenius_phaeopus"
[245] "Numenius_arquata" "Larus_glaucescens"
[247] "Larus_californicus" "Rissa_tridactyla"
[249] "Cerorhinca_monocerata" "Pluvialis_squatarola"
[251] "Pluvialis_apricaria" "Haematopus_ostralegus"
[253] "Charadrius_alexandrinus" "Charadrius_hiaticula"
[255] "Podiceps_cristatus" "Eudyptes_chrysolophus"
[257] "Pygoscelis_adeliae" "Pygoscelis_antarcticus"
[259] "Streptopelia_decaocto" "Zenaida_asiatica"
[261] "Sephanoides_sephanoides" "Meleagris_gallopavo"
[263] "Phasianus_colchicus" "Alectoris_chukar"
[265] "Coturnix_coturnix" "Alopochen_aegyptiaca"
[267] "Somateria_mollissima" "Clangula_hyemalis"
[269] "Branta_leucopsis" "Anser_cygnoides"
[271] "Struthio_camelus" "Alligator_mississippiensis"
[273] "Apalone_spinifera" "Trachemys_scripta"
[275] "Graptemys_geographica" "Pseudemys_concinna"
[277] "Chrysemys_dorsalis" "Terrapene_carolina_triunguis"
[279] "Sternotherus_odoratus" "Kinosternon_subrubrum"
[281] "Chelydra_serpentina" "Python_bivittatus"
[283] "Nidirana_pleuraden" "Rana_pipiens"
[285] "Crossodactylus_schmidti" "Xenopus_laevis"
[287] "Plethodon_glutinosus" "Plethodon_cylindraceus"
[289] "Plethodon_montanus" "Plethodon_metcalfi"
[291] "Plethodon_cinereus" "Plethodon_vandykei"
[293] "Plethodon_idahoensis" "Plethodon_vehiculum"
[295] "Plethodon_dunni" "Carassius_auratus"
[297] "Silurus_meridionalis" "Bryconamericus_iheringii"
[299] "Oncorhynchus_mykiss" "Salvelinus_namaycush"
[301] "Salvelinus_alpinus" "Salmo_salar"
[303] "Salmo_trutta" "Paralichthys_olivaceus"
[305] "Dicentrarchus_labrax" "Scatophagus_argus"
[307] "Parachaenichthys_charcoti" "Notothenia_rossii"
[309] "Notothenia_neglecta" "Gobionotothen_gibberifrons"
[311] "Perca_flavescens" "Macquaria_australasica"
[313] "Aplodactylus_punctatus" "Thunnus_orientalis"
[315] "Coryphopterus_glaucofraenum" "Gadus_morhua"
[317] "Atlantoraja_cyclophora" "Malacoraja_senta"
[319] "Hypnos_monopterygius" "Cetorhinus_maximus"
[321] "Orthoperus_atomus" "Sericoderus_lateralis"
[323] "Staphylinus_caesareus" "Primorskiella_anodonta"
[325] "Porophila_mystacea" "Nanosella_russica"
[327] "Acrotrichis_montandonii" "Acrotrichis_grandicollis"
[329] "Trichogramma_evanescens" "Megaphragma_mymaripenne"
[331] "Anaphes_flavipes" "Liposcelis_bostrychophila"
[333] "Heliothrips_haemorrhoidalis" "Lepisma_saccharina"
setdiff(dat$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree <- keep.tip(tree, intersect(tree$tip.label, dat$phylo))
dat<- dat[dat$phylo %in% tree$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree$tip.label, dat$phylo)character(0)
setdiff(dat$phylo, tree$tip.label)character(0)
# If not, lets calculate them usang the Grafen's method
tree <- compute.brlen(tree, method = "Grafen")
is.ultrametric(tree)[1] TRUE
# Compute phylogenetic covariance matrix (Brownian motion)
A <- ape::vcv.phylo(tree)Now the tree match the species in the dataframe, we will create a figure of the phylogenetic tree containing the 29 species for which we have organ size data for both males and females. We will add a plot indicating whether each organ was measured (green) or not (grey).
# Remove underscores from organ names
dat$organ_grouped <- gsub("_", " ", dat$organ_grouped)
summary_data <- dat %>%
count(phylo, organ_grouped) %>% # Count occurrences by species and organ
mutate(presence = if_else(n > 0, "Yes", "No")) %>% # Convert counts to "Yes"/"No"
select(-n) %>% # Remove the original count column
pivot_wider(
names_from = organ_grouped, # Each organ becomes a column
values_from = presence, # Values are now "Yes" or "No"
values_fill = list(presence = "No") # Fill missing combinations with "No"
)
summary_data <- summary_data[summary_data$phylo %in% tree$tip.label, ]
# Align data and tree and convert to matrix format for gheatmap
datF <- summary_data %>%
column_to_rownames("phylo") %>%
as.matrix()# Make Figure 1
# Create phylogeny tree
plot_tree <- ggtree(tree, layout = "rectangular", branch.length = "none") +
geom_tiplab(aes(label = paste0("italic('", label, "')")),
size = 2.5, align = TRUE, parse = TRUE)
Figure_1 <- gheatmap(
plot_tree,
datF,
width = 1,
offset = 5,
colnames_angle = 45,
colnames_offset_y = 30,
font.size = 2.5,
hjust = 0
) +
scale_fill_manual(values = c("grey90", "#009E73"), name = "") +
theme(
legend.position = "none",
plot.margin = margin(t = 35, r = 10, b = 0, l = 0)
) +
coord_cartesian(clip = "off")
Figure_1# Save Figure 1
ggsave('../outputs/Figure_1.pdf', Figure_1, width = 7, height = 6)
ggsave('../outputs/Figure_1.png', Figure_1, width = 7, height = 6, dpi = 1200)As shown in the plot above, not all organs were measured across all species. The most frequently measured organ, with the highest number of observations, is the brain (16 species), followed by the liver (14 species), heart (12 species), and spleen (11 species), each measured in more than five species. Other organs, such as muscle, were recorded only once.
dat %>%
count(organ_grouped, species, class) %>%
group_by(organ_grouped, class) %>%
summarise(
n_species = n_distinct(species),
n_studies = sum(n),
.groups = 'drop'
) %>%
arrange(desc(n_species))# A tibble: 29 × 4
organ_grouped class n_species n_studies
<chr> <fct> <int> <int>
1 brain Aves 10 20
2 heart Mammalia 7 34
3 kidneys Mammalia 7 40
4 liver Mammalia 7 48
5 lungs Mammalia 7 30
6 pituitary glands Mammalia 7 14
7 spleen Mammalia 7 36
8 brain Mammalia 6 24
9 adrenal glands Mammalia 5 22
10 liver Aves 5 32
# ℹ 19 more rows
dat %>%
distinct(study_id, species, organ_grouped) %>%
group_by(study_id) %>%
summarise(
Species_studied = paste(sort(unique(species)), collapse = ", "),
Organs_studied = paste(sort(unique(organ_grouped)), collapse = ", "),
.groups = "drop"
) %>%
arrange(study_id) %>%
kable(align = c("l", "l", "l"),
full_width = FALSE,
table.attr = 'width="100%"') %>%
column_spec(1, width = "15%") %>%
column_spec(2, width = "42.5%") %>%
column_spec(3, width = "42.5%")| study_id | Species_studied | Organs_studied |
|---|---|---|
| rayyan-150591802 | Anguilla anguilla | liver |
| rayyan-150591976 | Chlorocebus sabaeus | adrenal glands, brain, heart, kidneys, liver, lungs, pancreas, pituitary glands, spleen, thymus |
| rayyan-150591986 | Macaca fascicularis | brain, heart, kidneys, liver, lungs, pancreas, pituitary glands, spleen, thymus |
| rayyan-150591999 | Gallus gallus | liver, pancreas, small intestine, spleen, stomach |
| rayyan-150592010 | Rattus norvegicus | heart, kidneys, liver, lungs, spleen |
| rayyan-150592028 | Mus musculus | heart, kidneys, liver, lungs, pancreas |
| rayyan-150592046 | Gallus gallus | liver, spleen |
| rayyan-150592058 | Saimiri sciureus sciureus | adrenal glands, brain, heart, kidneys, liver, lungs, pancreas, pituitary glands, spleen |
| rayyan-150592095 | Rattus norvegicus | adrenal glands, brain, heart, kidneys, liver, lungs, spleen |
| rayyan-150592101 | Anas acuta, Aythya fuligula, Gavia arctica, Lagopus lagopus, Lagopus muta, Melanitta nigra, Pica pica, Plectrophenax nivalis, Tetrao urogallus, Tringa glareola | brain |
| rayyan-150592161 | Ficedula hypoleuca | liver, spleen |
| rayyan-150592185 | Macaca fascicularis | adrenal glands, kidneys, liver, spleen, thymus |
| rayyan-150592208 | Mus musculus, Rattus norvegicus | liver, lungs, spleen |
| rayyan-150592224 | Rattus norvegicus | heart, kidneys, liver, lungs, spleen |
| rayyan-150592233 | Mus musculus | heart, kidneys, liver, lungs, spleen |
| rayyan-150592252 | Rattus norvegicus | brain, heart, kidneys, liver, spleen |
| rayyan-150592278 | Pteropus alecto, Pteropus poliocephalus, Pteropus scapulatus | pituitary glands |
| rayyan-150592321 | Rattus norvegicus | adrenal glands, brain, heart, kidneys, liver, lungs, spleen, thymus |
| rayyan-150592363 | Rattus norvegicus | adrenal glands, brain, heart, kidneys, liver, spleen, thymus |
| rayyan-150592365 | Rattus norvegicus | adrenal glands, brain, heart, kidneys, liver, pituitary glands, spleen, thymus |
| rayyan-150592376 | Rattus norvegicus | kidneys, liver, spleen |
| rayyan-150592477 | Rattus norvegicus | adrenal glands, brain, heart, kidneys, liver, spleen, thymus |
| rayyan-150592513 | Anas platyrhynchos | adrenal glands, heart, kidneys, large intestine, liver, pancreas, spleen, stomach |
| rayyan-150592533 | Coturnix japonica | adrenal glands, heart, liver, spleen, stomach |
| rayyan-150592594 | Mus musculus | heart, large intestine, liver, lungs, small intestine, stomach |
| rayyan-150592597 | Phrynocephalus vlangalii | heart, lungs, stomach |
| rayyan-150594227 | Poecilia reticulata | heart |
| rayyan-150594231 | Lasiopodomys brandtii, Meriones unguiculatus | brain, heart, kidneys, large intestine, liver, lungs, small intestine, spleen, stomach |
| rayyan-150594290 | Mus musculus | adrenal glands, kidneys, liver |
| rayyan-150594616 | Coturnix japonica | heart, liver, stomach |
| rayyan-150639932 | Numida meleagris | heart, liver, stomach |
| rayyan-150640225 | Oreochromis niloticus | liver |
| rayyan-150726157 | Saimiri sciureus sciureus | brain |
| rayyan-150726166 | Anas platyrhynchos | muscle |
Since our objective is to account for the evolutionary history of the species, it is essential to recognise that, in a comparative context, species cannot be treated as independent units of observation. Closely related species tend to exhibit similar values for a given trait due to shared ancestry. Although no strict rule exists for the minimum number of species required, Garland and Adolph’s paper Why Not to Do Two-Species Comparative Studies: Limitations on Inferring Adaptation’ discusses relevant limitations and recommends broader sampling. For simplicity, we evaluated the effects of accounting for shared evolutionary history in organs measured across more than two species, and compared these results with non-phylogenetic models. This rule of thumb naturally excluded analyses of organs represented by a single species, such as muscle. For this particular organ, only a simplified version of the Bayesian model was applied.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "liver")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[3] "Pteropus_alecto" "Plectrophenax_nivalis"
[5] "Pica_pica" "Tringa_glareola"
[7] "Gavia_arctica" "Lagopus_muta"
[9] "Lagopus_lagopus" "Tetrao_urogallus"
[11] "Aythya_fuligula" "Anas_acuta"
[13] "Melanitta_nigra" "Phrynocephalus_vlangalii"
[15] "Poecilia_reticulata"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_liver <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 2 finished in 3.3 seconds.
Chain 3 finished in 3.4 seconds.
Chain 1 finished in 4.3 seconds.
Chain 4 finished in 4.4 seconds.
All 4 chains finished successfully.
Mean chain execution time: 3.8 seconds.
Total execution time: 4.5 seconds.
pp_check(model_simple_liver)summary(model_simple_liver) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 86)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.21 0.24 0.00 0.85 1.00
sd(log10_body_size) 0.14 0.17 0.00 0.59 1.00
cor(Intercept,log10_body_size) -0.09 0.62 -0.98 0.96 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2127 3351
sd(log10_body_size) 1317 1308
cor(Intercept,log10_body_size) 3727 4070
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.33 0.20 -1.79 -0.91 1.00 2326 2220
log10_body_size 0.88 0.11 0.59 1.12 1.00 1235 613
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.17 0.01 0.14 0.20 1.00 5658 4998
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_liver)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.893 0.839 0.944
2 male 0.897 0.846 0.948
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_liver,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Liver size"),
title = "Liver size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_liver <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 3 finished in 9.6 seconds.
Chain 4 finished in 11.1 seconds.
Chain 2 finished in 15.0 seconds.
Chain 1 finished in 16.8 seconds.
All 4 chains finished successfully.
Mean chain execution time: 13.1 seconds.
Total execution time: 16.9 seconds.
pp_check(model_phylo_liver)summary(model_phylo_liver) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 86)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 14)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.15 0.04 0.09 0.25 1.00 2863 4071
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.19 0.24 0.00 0.86 1.00
sd(log10_body_size) 0.13 0.17 0.00 0.60 1.00
cor(Intercept,log10_body_size) -0.04 0.62 -0.98 0.96 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3023 3698
sd(log10_body_size) 1811 3615
cor(Intercept,log10_body_size) 4394 4388
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.39 0.22 -1.85 -0.96 1.00 2974 2582
log10_body_size 0.86 0.11 0.57 1.07 1.00 1943 1452
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.09 0.01 0.07 0.10 1.00 7103 5270
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_liver, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.47 0.26 0.02 0.86 0.18
Post.Prob Star
1 0.15 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_liver <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_liver)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.879 0.822 0.937
2 male 0.875 0.819 0.931
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_liver,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Liver size"),
title = "Liver size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_liver)
loo_phylo <- loo(model_phylo_liver)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_liver 0.0 0.0
model_simple_liver -50.9 7.2
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_liver) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_liver) has an expected log predictive density (ELPD) that is about 50.9 units lower, with an associated standard error of 7.2. In the context of leave‑one‑out cross‑validation, differences exceeding roughly four to five times their standard error are typically regarded as strong evidence (Vehtari et al., 2016; Sivula et al., 2025). Here, 50.9/7.2≈7, indicating a very large difference relative to its uncertainty, and thus providing strong support for the phylogenetic model.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "brain")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Mus_musculus" "Pteropus_scapulatus"
[3] "Pteropus_poliocephalus" "Pteropus_alecto"
[5] "Ficedula_hypoleuca" "Gallus_gallus"
[7] "Coturnix_japonica" "Numida_meleagris"
[9] "Anas_platyrhynchos" "Phrynocephalus_vlangalii"
[11] "Oreochromis_niloticus" "Poecilia_reticulata"
[13] "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_brain <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 1.8 seconds.
Chain 3 finished in 1.8 seconds.
Chain 4 finished in 1.8 seconds.
Chain 2 finished in 2.0 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.8 seconds.
Total execution time: 2.1 seconds.
pp_check(model_simple_brain)summary(model_simple_brain) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 44)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.29 0.31 0.01 1.10 1.00
sd(log10_body_size) 0.18 0.20 0.00 0.73 1.00
cor(Intercept,log10_body_size) -0.11 0.59 -0.97 0.93 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2884 2777
sd(log10_body_size) 1879 1561
cor(Intercept,log10_body_size) 3878 3854
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.57 0.33 -2.23 -0.93 1.00 4724 4133
log10_body_size 0.81 0.15 0.48 1.10 1.00 2138 1851
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.31 0.04 0.25 0.39 1.00 6068 4884
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_brain)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.826 0.650 1.00
2 male 0.806 0.635 0.975
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_brain,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Brain size"),
title = "Brain size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_brain <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 2 finished in 18.8 seconds.
Chain 4 finished in 19.3 seconds.
Chain 3 finished in 19.6 seconds.
Chain 1 finished in 20.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 19.5 seconds.
Total execution time: 20.2 seconds.
pp_check(model_phylo_brain)summary(model_phylo_brain) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 44)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 16)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.96 0.18 0.66 1.38 1.00 759 1670
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.19 0.25 0.00 0.85 1.00
sd(log10_body_size) 0.12 0.16 0.00 0.56 1.00
cor(Intercept,log10_body_size) -0.02 0.64 -0.98 0.97 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 1635 504
sd(log10_body_size) 982 452
cor(Intercept,log10_body_size) 2762 3983
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 0.33 0.63 -0.87 1.59 1.00 684 1315
log10_body_size 0.12 0.11 -0.07 0.38 1.00 1522 1531
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.03 0.00 0.02 0.04 1.00 1278 440
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_brain, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.93 0.13 0.52 1 0
Post.Prob Star
1 0 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_brain <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_brain)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.0989 -0.00190 0.228
2 male 0.104 0.0109 0.226
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_brain,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Brain size"),
title = "Brain size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_brain)
loo_phylo <- loo(model_phylo_brain)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_brain 0.0 0.0
model_simple_brain -99.0 5.7
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_brain) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_brain) has an expected log predictive density (ELPD) that is about 99.5 units lower, with an associated standard error of 5.5. In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 99.5/5.5.2≈18, indicating a very large difference relative to its uncertainty, and thus providing strong support for the phylogenetic model.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "heart")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Pteropus_scapulatus" "Pteropus_poliocephalus" "Pteropus_alecto"
[4] "Ficedula_hypoleuca" "Plectrophenax_nivalis" "Pica_pica"
[7] "Tringa_glareola" "Gavia_arctica" "Lagopus_muta"
[10] "Lagopus_lagopus" "Tetrao_urogallus" "Gallus_gallus"
[13] "Aythya_fuligula" "Anas_acuta" "Melanitta_nigra"
[16] "Oreochromis_niloticus" "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_heart <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 3 finished in 1.7 seconds.
Chain 1 finished in 1.9 seconds.
Chain 4 finished in 2.2 seconds.
Chain 2 finished in 2.8 seconds.
All 4 chains finished successfully.
Mean chain execution time: 2.1 seconds.
Total execution time: 2.9 seconds.
pp_check(model_simple_heart)summary(model_simple_heart) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 58)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.23 0.25 0.01 0.95 1.00
sd(log10_body_size) 0.16 0.18 0.00 0.64 1.01
cor(Intercept,log10_body_size) -0.07 0.61 -0.98 0.96 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2491 3144
sd(log10_body_size) 618 274
cor(Intercept,log10_body_size) 3163 3177
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -2.95 0.21 -3.35 -2.47 1.01 725 344
log10_body_size 1.22 0.14 0.77 1.45 1.01 519 110
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.24 0.03 0.20 0.30 1.01 963 164
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_heart)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 1.25 1.18 1.32
2 male 1.24 1.18 1.31
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_heart,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = -0.9,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = -0.9, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Heart size"),
title = "Heart size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_heart <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 9.5 seconds.
Chain 4 finished in 10.4 seconds.
Chain 3 finished in 10.5 seconds.
Chain 2 finished in 10.9 seconds.
All 4 chains finished successfully.
Mean chain execution time: 10.3 seconds.
Total execution time: 11.0 seconds.
pp_check(model_phylo_heart)summary(model_phylo_heart) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 58)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 12)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.68 0.20 0.37 1.14 1.00 1141 2129
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.19 0.25 0.00 0.88 1.00
sd(log10_body_size) 0.16 0.21 0.00 0.98 1.01
cor(Intercept,log10_body_size) -0.06 0.61 -0.98 0.96 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2927 3476
sd(log10_body_size) 290 54
cor(Intercept,log10_body_size) 5259 5004
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -2.31 0.46 -3.15 -1.37 1.01 1024 3164
log10_body_size 0.79 0.21 0.31 1.10 1.01 286 53
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.09 0.01 0.07 0.11 1.01 2790 3932
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_heart, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.87 0.18 0.32 0.99 0
Post.Prob Star
1 0 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_heart <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_heart)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.803 0.567 1.02
2 male 0.822 0.611 1.03
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_heart,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = -0.9,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = -0.9, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Heart size"),
title = "Heart size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_heart)
loo_phylo <- loo(model_phylo_heart)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_heart 0.0 0.0
model_simple_heart -54.4 6.8
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_heart) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_heart) has an expected log predictive density (ELPD) that is about 54.1 units lower, with an associated standard error of 6.9. In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 54.1/6.9≈7.8, indicating a very large difference relative to its uncertainty, and thus providing strong support for the phylogenetic model.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "pancreas")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Rattus_norvegicus" "Meriones_unguiculatus"
[3] "Lasiopodomys_brandtii" "Pteropus_scapulatus"
[5] "Pteropus_poliocephalus" "Pteropus_alecto"
[7] "Ficedula_hypoleuca" "Plectrophenax_nivalis"
[9] "Pica_pica" "Tringa_glareola"
[11] "Gavia_arctica" "Lagopus_muta"
[13] "Lagopus_lagopus" "Tetrao_urogallus"
[15] "Coturnix_japonica" "Numida_meleagris"
[17] "Aythya_fuligula" "Anas_acuta"
[19] "Melanitta_nigra" "Phrynocephalus_vlangalii"
[21] "Oreochromis_niloticus" "Poecilia_reticulata"
[23] "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_pancreas <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 2 finished in 2.1 seconds.
Chain 3 finished in 2.0 seconds.
Chain 4 finished in 2.3 seconds.
Chain 1 finished in 2.6 seconds.
All 4 chains finished successfully.
Mean chain execution time: 2.2 seconds.
Total execution time: 2.7 seconds.
pp_check(model_simple_pancreas)summary(model_simple_pancreas) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 12)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.26 0.28 0.01 1.01 1.00
sd(log10_body_size) 0.14 0.16 0.00 0.59 1.00
cor(Intercept,log10_body_size) -0.08 0.60 -0.98 0.95 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2345 2324
sd(log10_body_size) 1739 2024
cor(Intercept,log10_body_size) 3550 3755
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.84 0.26 -2.40 -1.34 1.00 3275 3269
log10_body_size 0.73 0.11 0.48 0.96 1.00 1878 1683
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.10 0.03 0.06 0.17 1.00 4306 4055
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_pancreas)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.731 0.646 0.818
2 male 0.735 0.655 0.818
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_pancreas,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Pancreas size"),
title = "Pancreas size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_pancreas <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 4 finished in 4.5 seconds.
Chain 3 finished in 4.8 seconds.
Chain 1 finished in 5.3 seconds.
Chain 2 finished in 6.4 seconds.
All 4 chains finished successfully.
Mean chain execution time: 5.2 seconds.
Total execution time: 6.6 seconds.
pp_check(model_phylo_pancreas)summary(model_phylo_pancreas) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 12)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 6)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.16 0.12 0.01 0.46 1.00 988 1599
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.23 0.26 0.01 0.92 1.00
sd(log10_body_size) 0.13 0.16 0.00 0.58 1.00
cor(Intercept,log10_body_size) -0.08 0.62 -0.98 0.96 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3048 3611
sd(log10_body_size) 1960 2517
cor(Intercept,log10_body_size) 4652 3822
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.73 0.36 -2.41 -0.90 1.00 2647 3022
log10_body_size 0.70 0.13 0.39 0.95 1.00 1610 1326
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.06 0.03 0.03 0.14 1.00 1044 2455
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_pancreas, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.43 0.34 0 0.98 0.42
Post.Prob Star
1 0.3 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_pancreas <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_pancreas)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.693 0.450 0.845
2 male 0.698 0.462 0.843
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_pancreas,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Pancreas size"),
title = "Pancreas size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_pancreas)
loo_phylo <- loo(model_phylo_pancreas)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_pancreas 0.0 0.0
model_simple_pancreas -2.7 1.2
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_pancreas) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_pancreas) has an expected log predictive density (ELPD) that is about 3.2 units lower, with an associated standard error of 1.1. In the context of leave‑one‑out cross‑validation, differences exceeding four times their standard error are typically regarded as strong evidence. Here, 3.2/1.1≈2.9, indicating a similar support for both models.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "pituitary glands")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Mus_musculus" "Meriones_unguiculatus"
[3] "Lasiopodomys_brandtii" "Ficedula_hypoleuca"
[5] "Plectrophenax_nivalis" "Pica_pica"
[7] "Tringa_glareola" "Gavia_arctica"
[9] "Lagopus_muta" "Lagopus_lagopus"
[11] "Tetrao_urogallus" "Gallus_gallus"
[13] "Coturnix_japonica" "Numida_meleagris"
[15] "Aythya_fuligula" "Anas_platyrhynchos"
[17] "Anas_acuta" "Melanitta_nigra"
[19] "Phrynocephalus_vlangalii" "Oreochromis_niloticus"
[21] "Poecilia_reticulata" "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_pituitary_glands <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 1.1 seconds.
Chain 4 finished in 1.0 seconds.
Chain 3 finished in 1.2 seconds.
Chain 2 finished in 1.4 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.2 seconds.
Total execution time: 1.5 seconds.
pp_check(model_simple_pituitary_glands)summary(model_simple_pituitary_glands) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 14)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.44 0.50 0.01 1.91 1.10
sd(log10_body_size) 0.17 0.19 0.00 0.70 1.04
cor(Intercept,log10_body_size) -0.16 0.61 -0.97 0.94 1.04
Bulk_ESS Tail_ESS
sd(Intercept) 32 24
sd(log10_body_size) 1794 1870
cor(Intercept,log10_body_size) 68 1829
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -3.31 0.79 -4.39 -0.88 1.11 25 14
log10_body_size 0.67 0.18 0.34 1.05 1.02 193 289
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.24 0.06 0.16 0.38 1.01 1537 3913
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_pituitary_glands)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.659 0.393 0.918
2 male 0.652 0.395 0.911
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_pituitary_glands,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = -0.5,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = -0.5, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Pituitary glands size"),
title = "Pituitary glands size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_pituitary_glands <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 3 finished in 4.6 seconds.
Chain 4 finished in 5.2 seconds.
Chain 1 finished in 5.6 seconds.
Chain 2 finished in 5.7 seconds.
All 4 chains finished successfully.
Mean chain execution time: 5.3 seconds.
Total execution time: 5.8 seconds.
pp_check(model_phylo_pituitary_glands)summary(model_phylo_pituitary_glands) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 14)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 7)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.32 0.13 0.15 0.64 1.00 1983 2320
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.28 0.30 0.01 1.10 1.00
sd(log10_body_size) 0.17 0.20 0.00 0.68 1.00
cor(Intercept,log10_body_size) -0.06 0.61 -0.97 0.95 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3212 3473
sd(log10_body_size) 1626 2324
cor(Intercept,log10_body_size) 4817 3691
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -3.09 0.60 -4.26 -1.87 1.00 3350 4217
log10_body_size 0.56 0.22 0.15 1.03 1.00 1646 923
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.08 0.04 0.04 0.18 1.00 2121 2251
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_pituitary_glands, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.6 0.3 0.05 0.98 0.08
Post.Prob Star
1 0.08 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_pituitary_glands <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_pituitary_glands)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.521 0.161 0.879
2 male 0.517 0.172 0.858
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_pituitary_glands,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = -0.5,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = -0.5, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Pituitary glands size"),
title = "Pituitary glands size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_pituitary_glands)
loo_phylo <- loo(model_phylo_pituitary_glands)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_pituitary_glands 0.0 0.0
model_simple_pituitary_glands -12.2 1.5
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_pituitary_glands) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_pituitary_glands) has an expected log predictive density (ELPD) that is about 12.1 units lower, with an associated standard error of 1.3 In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 12.1/1.3≈9.3, indicating a large difference relative to its uncertainty, and thus providing strong support for the phylogenetic model.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "spleen")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[3] "Pteropus_alecto" "Plectrophenax_nivalis"
[5] "Pica_pica" "Tringa_glareola"
[7] "Gavia_arctica" "Lagopus_muta"
[9] "Lagopus_lagopus" "Tetrao_urogallus"
[11] "Numida_meleagris" "Aythya_fuligula"
[13] "Anas_acuta" "Melanitta_nigra"
[15] "Phrynocephalus_vlangalii" "Oreochromis_niloticus"
[17] "Poecilia_reticulata" "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_spleen <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 3 finished in 1.6 seconds.
Chain 1 finished in 1.8 seconds.
Chain 2 finished in 1.7 seconds.
Chain 4 finished in 2.0 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.8 seconds.
Total execution time: 2.2 seconds.
pp_check(model_simple_spleen)summary(model_simple_spleen) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 62)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.27 0.28 0.01 1.02 1.02
sd(log10_body_size) 0.19 0.23 0.00 0.96 1.02
cor(Intercept,log10_body_size) -0.03 0.62 -0.97 0.95 1.01
Bulk_ESS Tail_ESS
sd(Intercept) 405 2836
sd(log10_body_size) 214 56
cor(Intercept,log10_body_size) 777 3778
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -3.10 0.26 -3.63 -2.53 1.01 2680 2658
log10_body_size 1.08 0.12 0.78 1.34 1.00 1348 1267
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.33 0.03 0.28 0.40 1.00 5571 5066
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_spleen)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 1.10 0.986 1.22
2 male 1.10 0.993 1.21
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_spleen,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Spleen size"),
title = "Spleen size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_spleen <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 4 finished in 4.7 seconds.
Chain 3 finished in 5.7 seconds.
Chain 1 finished in 6.8 seconds.
Chain 2 finished in 7.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 6.1 seconds.
Total execution time: 7.2 seconds.
pp_check(model_phylo_spleen)summary(model_phylo_spleen) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 62)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 11)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.59 0.16 0.35 0.97 1.00 2480 4430
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.26 0.29 0.01 1.05 1.00
sd(log10_body_size) 0.16 0.19 0.00 0.70 1.00
cor(Intercept,log10_body_size) -0.05 0.61 -0.97 0.95 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2681 3275
sd(log10_body_size) 2221 3296
cor(Intercept,log10_body_size) 4800 4164
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -2.89 0.50 -3.85 -1.86 1.00 2167 2756
log10_body_size 0.95 0.17 0.57 1.28 1.00 1914 2240
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.18 0.02 0.15 0.23 1.00 4337 1672
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_spleen, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.75 0.2 0.2 0.96 0
Post.Prob Star
1 0 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_spleen <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_spleen)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.963 0.721 1.18
2 male 0.958 0.724 1.16
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_spleen,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Spleen size"),
title = "Spleen size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_spleen)
loo_phylo <- loo(model_phylo_spleen)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_spleen 0.0 0.0
model_simple_spleen -32.4 5.4
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_spleen) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_spleen) has an expected log predictive density (ELPD) that is about 32.5 units lower, with an associated standard error of 5.4. In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 32.5/5.4≈6, indicating a very large difference relative to its uncertainty, and thus providing strong support for the phylogenetic model.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
# filter(n_distinct(species) >= 3) %>%
ungroup() %>%
filter(organ_grouped == "thymus")
# We removed the filter of more than 5 species per sex and organ here
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Mus_musculus" "Meriones_unguiculatus"
[3] "Lasiopodomys_brandtii" "Saimiri_sciureus_sciureus"
[5] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[7] "Pteropus_alecto" "Ficedula_hypoleuca"
[9] "Plectrophenax_nivalis" "Pica_pica"
[11] "Tringa_glareola" "Gavia_arctica"
[13] "Lagopus_muta" "Lagopus_lagopus"
[15] "Tetrao_urogallus" "Gallus_gallus"
[17] "Coturnix_japonica" "Numida_meleagris"
[19] "Aythya_fuligula" "Anas_platyrhynchos"
[21] "Anas_acuta" "Melanitta_nigra"
[23] "Phrynocephalus_vlangalii" "Oreochromis_niloticus"
[25] "Poecilia_reticulata" "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_thymus <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 1.3 seconds.
Chain 3 finished in 1.4 seconds.
Chain 4 finished in 1.4 seconds.
Chain 2 finished in 1.6 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.4 seconds.
Total execution time: 1.7 seconds.
pp_check(model_simple_thymus)summary(model_simple_thymus) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 14)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.31 0.31 0.01 1.11 1.00
sd(log10_body_size) 0.18 0.19 0.00 0.66 1.00
cor(Intercept,log10_body_size) -0.11 0.60 -0.98 0.94 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2920 2626
sd(log10_body_size) 1527 1463
cor(Intercept,log10_body_size) 3718 3604
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.82 0.41 -2.65 -1.01 1.00 4173 4260
log10_body_size 0.60 0.16 0.29 0.93 1.00 2138 2002
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.21 0.05 0.14 0.33 1.00 4018 3652
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_thymus)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.578 0.365 0.800
2 male 0.604 0.390 0.817
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_thymus,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 1,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 1, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Thymus size"),
title = "Thymus size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_thymus <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 2.3 seconds.
Chain 3 finished in 2.2 seconds.
Chain 4 finished in 2.9 seconds.
Chain 2 finished in 3.0 seconds.
All 4 chains finished successfully.
Mean chain execution time: 2.6 seconds.
Total execution time: 3.2 seconds.
pp_check(model_phylo_thymus)summary(model_phylo_thymus) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 14)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 3)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.31 0.25 0.02 0.93 1.00 2438 2364
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.32 0.33 0.01 1.17 1.00
sd(log10_body_size) 0.20 0.23 0.01 0.86 1.00
cor(Intercept,log10_body_size) -0.09 0.59 -0.97 0.94 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3556 2708
sd(log10_body_size) 1483 889
cor(Intercept,log10_body_size) 5383 4612
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.32 0.85 -2.85 0.46 1.00 2794 2098
log10_body_size 0.46 0.26 -0.09 0.96 1.00 2689 2779
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.18 0.05 0.11 0.31 1.00 3686 4279
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_thymus, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.4 0.3 0 0.94 0.43
Post.Prob Star
1 0.3 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_thymus <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_thymus)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.408 -0.131 0.856
2 male 0.441 -0.0683 0.875
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_thymus,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Thymus size"),
title = "Thymus size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_thymus)
loo_phylo <- loo(model_phylo_thymus)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_thymus 0.0 0.0
model_simple_thymus -1.0 1.1
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_thymus) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_thymus) has an expected log predictive density (ELPD) that is about 0.6 units lower, with an associated standard error of 1.3 In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 0.6/1.3≈0.46, indicating both models are similarly good.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "stomach")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Rattus_norvegicus" "Chlorocebus_sabaeus"
[3] "Macaca_fascicularis" "Saimiri_sciureus_sciureus"
[5] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[7] "Pteropus_alecto" "Ficedula_hypoleuca"
[9] "Plectrophenax_nivalis" "Pica_pica"
[11] "Tringa_glareola" "Gavia_arctica"
[13] "Lagopus_muta" "Lagopus_lagopus"
[15] "Tetrao_urogallus" "Aythya_fuligula"
[17] "Anas_acuta" "Melanitta_nigra"
[19] "Oreochromis_niloticus" "Poecilia_reticulata"
[21] "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_stomach <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 2 finished in 1.1 seconds.
Chain 1 finished in 1.2 seconds.
Chain 3 finished in 1.1 seconds.
Chain 4 finished in 1.6 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.3 seconds.
Total execution time: 1.8 seconds.
pp_check(model_simple_stomach)summary(model_simple_stomach) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 36)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.30 0.33 0.01 1.15 1.00
sd(log10_body_size) 0.19 0.21 0.00 0.76 1.00
cor(Intercept,log10_body_size) -0.09 0.60 -0.97 0.94 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2773 3182
sd(log10_body_size) 1362 871
cor(Intercept,log10_body_size) 3759 2156
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -2.55 0.34 -3.23 -1.83 1.00 2451 1208
log10_body_size 1.12 0.16 0.72 1.43 1.00 1127 508
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.40 0.05 0.31 0.50 1.00 5819 4926
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_stomach)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 1.15 0.976 1.33
2 male 1.15 0.960 1.32
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_stomach,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Stomach size"),
title = "Stomach size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_stomach <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 2.3 seconds.
Chain 3 finished in 2.2 seconds.
Chain 4 finished in 2.3 seconds.
Chain 2 finished in 2.5 seconds.
All 4 chains finished successfully.
Mean chain execution time: 2.3 seconds.
Total execution time: 2.7 seconds.
pp_check(model_phylo_stomach)summary(model_phylo_stomach) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 36)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 8)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.35 0.18 0.06 0.76 1.00 2296 2066
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.30 0.36 0.01 1.17 1.00
sd(log10_body_size) 0.18 0.20 0.00 0.74 1.00
cor(Intercept,log10_body_size) -0.08 0.60 -0.97 0.95 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3728 3113
sd(log10_body_size) 2764 3662
cor(Intercept,log10_body_size) 6268 4931
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -2.25 0.53 -3.23 -1.13 1.00 3607 3153
log10_body_size 0.98 0.21 0.52 1.36 1.00 3136 3293
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.35 0.05 0.27 0.46 1.00 5247 4936
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_stomach, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.36 0.22 0.01 0.8 0.25
Post.Prob Star
1 0.2 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_stomach <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_stomach)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 1.01 0.626 1.31
2 male 1.00 0.624 1.31
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_stomach,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Stomach size"),
title = "Stomach size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_stomach)
loo_phylo <- loo(model_phylo_stomach)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_stomach 0.0 0.0
model_simple_stomach -2.1 2.6
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_stomach) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_stomach) has an expected log predictive density (ELPD) that is about 1.9 units lower, with an associated standard error of 2.5 In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 1.9/2.5≈0.76, indicating both models are similarly good.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 1) %>%
ungroup() %>%
filter(organ_grouped == "small intestine")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Rattus_norvegicus" "Chlorocebus_sabaeus"
[3] "Macaca_fascicularis" "Saimiri_sciureus_sciureus"
[5] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[7] "Pteropus_alecto" "Ficedula_hypoleuca"
[9] "Plectrophenax_nivalis" "Pica_pica"
[11] "Tringa_glareola" "Gavia_arctica"
[13] "Lagopus_muta" "Lagopus_lagopus"
[15] "Tetrao_urogallus" "Coturnix_japonica"
[17] "Numida_meleagris" "Aythya_fuligula"
[19] "Anas_platyrhynchos" "Anas_acuta"
[21] "Melanitta_nigra" "Phrynocephalus_vlangalii"
[23] "Oreochromis_niloticus" "Poecilia_reticulata"
[25] "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_small_intestine <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 0.6 seconds.
Chain 2 finished in 0.5 seconds.
Chain 3 finished in 0.6 seconds.
Chain 4 finished in 0.7 seconds.
All 4 chains finished successfully.
Mean chain execution time: 0.6 seconds.
Total execution time: 0.9 seconds.
pp_check(model_simple_small_intestine)summary(model_simple_small_intestine) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 10)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.35 0.33 0.01 1.22 1.00
sd(log10_body_size) 0.24 0.23 0.01 0.89 1.00
cor(Intercept,log10_body_size) -0.09 0.59 -0.97 0.94 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3067 2506
sd(log10_body_size) 2455 3067
cor(Intercept,log10_body_size) 4396 4843
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.42 0.52 -2.47 -0.38 1.00 4933 4634
log10_body_size 0.87 0.26 0.36 1.37 1.00 2894 2822
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.44 0.13 0.26 0.77 1.00 4993 4810
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_small_intestine)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.892 0.428 1.33
2 male 0.901 0.440 1.36
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_small_intestine,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Small intestine size"),
title = "Small intestine size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_small_intestine <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 2 finished in 4.9 seconds.
Chain 1 finished in 5.4 seconds.
Chain 4 finished in 5.6 seconds.
Chain 3 finished in 6.4 seconds.
All 4 chains finished successfully.
Mean chain execution time: 5.6 seconds.
Total execution time: 6.6 seconds.
pp_check(model_phylo_small_intestine)summary(model_phylo_small_intestine) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 10)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 4)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.63 0.28 0.29 1.35 1.01 3036 3884
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.26 0.28 0.01 0.98 1.01
sd(log10_body_size) 0.18 0.21 0.00 0.76 1.01
cor(Intercept,log10_body_size) -0.12 0.61 -0.98 0.94 1.01
Bulk_ESS Tail_ESS
sd(Intercept) 803 416
sd(log10_body_size) 732 2544
cor(Intercept,log10_body_size) 852 421
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.15 0.91 -2.93 0.72 1.00 2994 4007
log10_body_size 0.74 0.35 0.04 1.41 1.00 3722 4472
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.06 0.03 0.03 0.14 1.00 1851 2142
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_small_intestine, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.8 0.23 0.19 1 0
Post.Prob Star
1 0 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_small_intestine <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_small_intestine)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.743 0.0163 1.45
2 male 0.766 0.0451 1.49
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_small_intestine,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Small intestine size"),
title = "Small intestine size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_small_intestine)
loo_phylo <- loo(model_phylo_small_intestine)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_small_intestine 0.0 0.0
model_simple_small_intestine -18.5 1.4
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_small intestine) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_small intestine) has an expected log predictive density (ELPD) that is about 18.7 units lower, with an associated standard error of 1.4 In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 18.7/1.4≈13, indicating a very large difference relative to its uncertainty, and thus providing strong support for the phylogenetic model.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 3) %>%
ungroup() %>%
filter(organ_grouped == "large intestine")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Rattus_norvegicus" "Chlorocebus_sabaeus"
[3] "Macaca_fascicularis" "Saimiri_sciureus_sciureus"
[5] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[7] "Pteropus_alecto" "Ficedula_hypoleuca"
[9] "Plectrophenax_nivalis" "Pica_pica"
[11] "Tringa_glareola" "Gavia_arctica"
[13] "Lagopus_muta" "Lagopus_lagopus"
[15] "Tetrao_urogallus" "Gallus_gallus"
[17] "Coturnix_japonica" "Numida_meleagris"
[19] "Aythya_fuligula" "Anas_acuta"
[21] "Melanitta_nigra" "Phrynocephalus_vlangalii"
[23] "Oreochromis_niloticus" "Poecilia_reticulata"
[25] "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_large_intestine <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 1.0 seconds.
Chain 4 finished in 1.0 seconds.
Chain 2 finished in 1.2 seconds.
Chain 3 finished in 1.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.1 seconds.
Total execution time: 1.3 seconds.
pp_check(model_simple_large_intestine)summary(model_simple_large_intestine) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 14)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.28 0.29 0.01 1.03 1.00
sd(log10_body_size) 0.19 0.19 0.00 0.71 1.00
cor(Intercept,log10_body_size) -0.08 0.59 -0.97 0.95 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2600 2557
sd(log10_body_size) 1931 2672
cor(Intercept,log10_body_size) 3784 3765
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.78 0.28 -2.34 -1.20 1.00 2905 2682
log10_body_size 0.68 0.16 0.35 1.03 1.00 1867 1468
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.19 0.05 0.13 0.31 1.00 4452 4024
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_large_intestine)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.675 0.498 0.860
2 male 0.662 0.476 0.844
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_large_intestine,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Large intestine size"),
title = "Large intestine size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_large_intestine <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 1.6 seconds.
Chain 3 finished in 1.5 seconds.
Chain 4 finished in 1.9 seconds.
Chain 2 finished in 2.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.8 seconds.
Total execution time: 2.2 seconds.
pp_check(model_phylo_large_intestine)summary(model_phylo_large_intestine) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 14)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 4)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.18 0.18 0.01 0.65 1.00 2585 2088
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.28 0.30 0.01 1.08 1.00
sd(log10_body_size) 0.19 0.22 0.00 0.76 1.00
cor(Intercept,log10_body_size) -0.07 0.60 -0.97 0.95 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3889 3748
sd(log10_body_size) 2539 3307
cor(Intercept,log10_body_size) 4591 4574
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.77 0.49 -2.80 -0.73 1.00 3178 2890
log10_body_size 0.68 0.21 0.25 1.14 1.00 2787 2522
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.20 0.05 0.12 0.32 1.00 5958 4837
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_large_intestine, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.25 0.27 0 0.88 0.88
Post.Prob Star
1 0.47 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_large_intestine <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_large_intestine)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.669 0.306 1.03
2 male 0.659 0.297 1.03
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_large_intestine,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Large intestine size"),
title = "Large intestine size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_large_intestine)
loo_phylo <- loo(model_phylo_large_intestine)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_simple_large_intestine 0.0 0.0
model_phylo_large_intestine -0.4 0.2
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_large intestine) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_large intestine) has an expected log predictive density (ELPD) that is about 0.2 units lower, with an associated standard error of 0.2 In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 0.2/0.2≈1, indicating both models are similarly good.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 1) %>%
ungroup() %>%
filter(organ_grouped == "muscle")model_simple_muscle <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 12.2 seconds.
Chain 2 finished in 12.5 seconds.
Chain 3 finished in 12.5 seconds.
Chain 4 finished in 12.5 seconds.
All 4 chains finished successfully.
Mean chain execution time: 12.4 seconds.
Total execution time: 12.8 seconds.
pp_check(model_simple_muscle)summary(model_simple_muscle) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 22)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.41 0.40 0.01 1.44 1.00
sd(log10_body_size) 0.19 0.19 0.00 0.72 1.01
cor(Intercept,log10_body_size) -0.19 0.58 -0.98 0.92 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 1569 2187
sd(log10_body_size) 1100 1916
cor(Intercept,log10_body_size) 2390 2775
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 0.62 0.52 -0.48 1.61 1.00 3282 2753
log10_body_size 0.55 0.18 0.18 0.91 1.00 1756 2063
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.02 0.00 0.01 0.03 1.00 3186 4282
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_muscle)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.597 0.298 0.939
2 male 0.520 0.232 0.803
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_muscle,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 2.91,
y_lab = 2.5,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 2.91, y = 2.5, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_x_continuous(limits = c(2.9, 3.2)) +
scale_y_continuous(limits = c(2.2, 2.5)) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Muscle size"),
title = "Muscle size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "lungs")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Pteropus_scapulatus" "Pteropus_poliocephalus" "Pteropus_alecto"
[4] "Ficedula_hypoleuca" "Plectrophenax_nivalis" "Pica_pica"
[7] "Tringa_glareola" "Gavia_arctica" "Lagopus_muta"
[10] "Lagopus_lagopus" "Tetrao_urogallus" "Gallus_gallus"
[13] "Coturnix_japonica" "Numida_meleagris" "Aythya_fuligula"
[16] "Anas_platyrhynchos" "Anas_acuta" "Melanitta_nigra"
[19] "Oreochromis_niloticus" "Poecilia_reticulata" "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_lungs <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 4 finished in 1.3 seconds.
Chain 1 finished in 1.6 seconds.
Chain 3 finished in 1.6 seconds.
Chain 2 finished in 2.0 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1.6 seconds.
Total execution time: 2.1 seconds.
pp_check(model_simple_lungs)summary(model_simple_lungs) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 38)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.27 0.28 0.01 1.00 1.00
sd(log10_body_size) 0.18 0.20 0.00 0.74 1.00
cor(Intercept,log10_body_size) -0.07 0.60 -0.97 0.95 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3123 3155
sd(log10_body_size) 1328 733
cor(Intercept,log10_body_size) 3700 3957
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -2.54 0.27 -3.10 -1.96 1.00 3664 3759
log10_body_size 1.08 0.14 0.72 1.35 1.00 1236 468
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.27 0.03 0.22 0.35 1.00 5138 5015
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_lungs)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 1.11 0.975 1.25
2 male 1.09 0.958 1.22
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_lungs,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Lungs size"),
title = "Lungs size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_lungs <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 2 finished in 5.0 seconds.
Chain 4 finished in 5.1 seconds.
Chain 3 finished in 5.2 seconds.
Chain 1 finished in 5.9 seconds.
All 4 chains finished successfully.
Mean chain execution time: 5.3 seconds.
Total execution time: 6.0 seconds.
pp_check(model_phylo_lungs)summary(model_phylo_lungs) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 38)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 8)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.40 0.17 0.18 0.84 1.00 1202 2795
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.21 0.26 0.00 0.88 1.06
sd(log10_body_size) 0.18 0.22 0.00 0.77 1.07
cor(Intercept,log10_body_size) -0.04 0.61 -0.98 0.95 1.01
Bulk_ESS Tail_ESS
sd(Intercept) 45 20
sd(log10_body_size) 44 37
cor(Intercept,log10_body_size) 1041 4275
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.88 0.49 -2.69 -0.71 1.03 1582 1984
log10_body_size 0.72 0.21 0.32 1.09 1.05 60 524
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.10 0.01 0.07 0.13 1.03 2787 4483
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_lungs, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.72 0.25 0.11 0.98 0.02
Post.Prob Star
1 0.02 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_lungs <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_lungs)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.751 0.347 1.05
2 male 0.742 0.359 1.01
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_lungs,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Lungs size"),
title = "Lungs size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_lungs)
loo_phylo <- loo(model_phylo_lungs)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_lungs 0.0 0.0
model_simple_lungs -35.6 4.8
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_lungs) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_lungs) has an expected log predictive density (ELPD) that is about 35.3 units lower, with an associated standard error of 4.9. In the context of leave‑one‑out cross‑validation, differences exceeding roughly four to 5.5 times their standard error are typically regarded as strong evidence. Here, 35.3/4.9≈7.2, indicating a very large difference relative to its uncertainty, and thus providing strong support for the phylogenetic model.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "adrenal glands")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Meriones_unguiculatus" "Lasiopodomys_brandtii"
[3] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[5] "Pteropus_alecto" "Ficedula_hypoleuca"
[7] "Plectrophenax_nivalis" "Pica_pica"
[9] "Tringa_glareola" "Gavia_arctica"
[11] "Lagopus_muta" "Lagopus_lagopus"
[13] "Tetrao_urogallus" "Gallus_gallus"
[15] "Numida_meleagris" "Aythya_fuligula"
[17] "Anas_acuta" "Melanitta_nigra"
[19] "Phrynocephalus_vlangalii" "Oreochromis_niloticus"
[21] "Poecilia_reticulata" "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_adrenal_glands <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 2.0 seconds.
Chain 4 finished in 2.3 seconds.
Chain 2 finished in 2.6 seconds.
Chain 3 finished in 2.9 seconds.
All 4 chains finished successfully.
Mean chain execution time: 2.4 seconds.
Total execution time: 3.1 seconds.
pp_check(model_simple_adrenal_glands)summary(model_simple_adrenal_glands) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 26)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.30 0.33 0.01 1.15 1.00
sd(log10_body_size) 0.18 0.21 0.00 0.74 1.00
cor(Intercept,log10_body_size) -0.07 0.59 -0.97 0.94 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2723 2896
sd(log10_body_size) 1960 2359
cor(Intercept,log10_body_size) 4180 3796
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -3.09 0.31 -3.65 -2.36 1.00 2742 2712
log10_body_size 0.79 0.15 0.45 1.12 1.00 1712 1266
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.19 0.03 0.14 0.26 1.00 5217 4865
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_adrenal_glands)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.790 0.686 0.893
2 male 0.794 0.692 0.896
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_adrenal_glands,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 0.5,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 0.5, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Adrenal glands size"),
title = "Adrenal glands size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_adrenal_glands <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 2 finished in 5.3 seconds.
Chain 4 finished in 6.8 seconds.
Chain 3 finished in 7.5 seconds.
Chain 1 finished in 7.7 seconds.
All 4 chains finished successfully.
Mean chain execution time: 6.8 seconds.
Total execution time: 7.7 seconds.
pp_check(model_phylo_adrenal_glands)summary(model_phylo_adrenal_glands) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 26)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 7)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.42 0.17 0.19 0.82 1.00 1749 3089
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.32 0.30 0.02 1.12 1.00
sd(log10_body_size) 0.15 0.18 0.00 0.64 1.00
cor(Intercept,log10_body_size) -0.10 0.59 -0.97 0.93 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 3346 2753
sd(log10_body_size) 2414 4574
cor(Intercept,log10_body_size) 5183 4918
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -2.60 0.48 -3.48 -1.56 1.00 2718 4183
log10_body_size 0.62 0.16 0.31 0.95 1.00 2246 2252
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.09 0.02 0.06 0.13 1.00 2714 4297
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_adrenal_glands, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.63 0.27 0.08 0.97 0.04
Post.Prob Star
1 0.04 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_adrenal_glands <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_adrenal_glands)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.588 0.358 0.778
2 male 0.610 0.389 0.791
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_adrenal_glands,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 0.5,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 0.5, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Adrenal glands size"),
title = "Adrenal glands size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_adrenal_glands)
loo_phylo <- loo(model_phylo_adrenal_glands)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_adrenal_glands 0.0 0.0
model_simple_adrenal_glands -15.2 4.2
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_adrenal glands) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_adrenal glands) has an expected log predictive density (ELPD) that is about 15 units lower, with an associated standard error of 4.2 In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 15/4.2≈3.6, indicating both models are similarly good.
# Filter organ of interest
dat_organ <- dat %>%
group_by(organ_grouped, sex) %>%
filter(n_distinct(species) >= 5) %>%
ungroup() %>%
filter(organ_grouped == "kidneys")
# Check for database/tree species mismatches
setdiff(tree$tip.label, dat_organ$phylo) # Tree species absent in database [1] "Pteropus_scapulatus" "Pteropus_poliocephalus"
[3] "Pteropus_alecto" "Ficedula_hypoleuca"
[5] "Plectrophenax_nivalis" "Pica_pica"
[7] "Tringa_glareola" "Gavia_arctica"
[9] "Lagopus_muta" "Lagopus_lagopus"
[11] "Tetrao_urogallus" "Gallus_gallus"
[13] "Coturnix_japonica" "Numida_meleagris"
[15] "Aythya_fuligula" "Anas_acuta"
[17] "Melanitta_nigra" "Phrynocephalus_vlangalii"
[19] "Oreochromis_niloticus" "Poecilia_reticulata"
[21] "Anguilla_anguilla"
setdiff(dat_organ$phylo, tree$tip.label) # Database species absent in treecharacter(0)
# Exclude mismatched species for consistency
tree_organ <- keep.tip(tree, intersect(tree$tip.label, dat_organ$phylo))
dat_organ <- dat_organ[dat_organ$phylo %in% tree_organ$tip.label, ]
# Re-check for mismatches after pruning
setdiff(tree_organ$tip.label, dat_organ$phylo)character(0)
setdiff(dat_organ$phylo, tree_organ$tip.label)character(0)
# Calculate branch lengths using Grafen's method
tree_organ <- compute.brlen(tree_organ, method = "Grafen")
is.ultrametric(tree_organ)[1] TRUE
# Compute phylogenetic covariance matrix
A_organ <- ape::vcv.phylo(tree_organ)model_simple_kidneys <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex),
data = dat_organ,
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 1 finished in 3.3 seconds.
Chain 2 finished in 4.1 seconds.
Chain 3 finished in 5.0 seconds.
Chain 4 finished in 5.8 seconds.
All 4 chains finished successfully.
Mean chain execution time: 4.6 seconds.
Total execution time: 6.0 seconds.
pp_check(model_simple_kidneys)summary(model_simple_kidneys) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex)
Data: dat_organ (Number of observations: 42)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.21 0.24 0.01 0.87 1.00
sd(log10_body_size) 0.11 0.14 0.00 0.48 1.00
cor(Intercept,log10_body_size) -0.08 0.62 -0.98 0.96 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2378 3413
sd(log10_body_size) 1784 2872
cor(Intercept,log10_body_size) 3224 3507
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.61 0.19 -2.02 -1.21 1.00 1995 1995
log10_body_size 0.78 0.09 0.57 0.97 1.00 1397 545
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.07 0.01 0.06 0.09 1.00 4538 2728
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_simple_kidneys)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.791 0.759 0.826
2 male 0.784 0.751 0.816
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_simple_kidneys,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Kidneys size"),
title = "Kidneys size ~ Body size + (1 + Body size | Sex)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)model_phylo_kidneys <- brm(
log10_organ_size ~
1 +
log10_body_size +
(1 + log10_body_size | sex) +
(1 | gr(phylo, cov = A_organ)),
data = dat_organ,
data2 = list(A_organ = A_organ),
family = gaussian(),
prior = c(
prior(normal(0.75, 0.5), class = "b", coef = "log10_body_size"),
prior(normal(0, 2), class = "Intercept"),
prior(student_t(3, 0, 0.5), class = "sd"),
prior(student_t(3, 0, 1), class = "sigma")
),
sample_prior = TRUE,
chains = 4,
cores = 4,
threads = threading(2),
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.95),
save_pars = save_pars(all = TRUE),
silent = TRUE, refresh = 0
)Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
Chain 3 finished in 6.8 seconds.
Chain 4 finished in 8.5 seconds.
Chain 1 finished in 9.2 seconds.
Chain 2 finished in 13.7 seconds.
All 4 chains finished successfully.
Mean chain execution time: 9.6 seconds.
Total execution time: 13.8 seconds.
pp_check(model_phylo_kidneys)summary(model_phylo_kidneys) Family: gaussian
Links: mu = identity; sigma = identity
Formula: log10_organ_size ~ 1 + log10_body_size + (1 + log10_body_size | sex) + (1 | gr(phylo, cov = A_organ))
Data: dat_organ (Number of observations: 42)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Multilevel Hyperparameters:
~phylo (Number of levels: 8)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.12 0.05 0.05 0.25 1.00 1417 3166
~sex (Number of levels: 2)
Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept) 0.21 0.26 0.01 0.90 1.00
sd(log10_body_size) 0.12 0.15 0.00 0.51 1.01
cor(Intercept,log10_body_size) -0.07 0.63 -0.97 0.97 1.00
Bulk_ESS Tail_ESS
sd(Intercept) 2019 2736
sd(log10_body_size) 1522 1982
cor(Intercept,log10_body_size) 4509 4286
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -1.63 0.23 -2.12 -1.16 1.00 2901 2795
log10_body_size 0.79 0.09 0.57 0.99 1.01 1488 522
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.05 0.01 0.04 0.07 1.00 3467 3532
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
hyp <- paste(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
(hyp <- hypothesis(model_phylo_kidneys, hyp, class = NULL))Hypothesis Tests for class :
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (sd_phylo__Interc... = 0 0.4 0.3 0.01 0.91 0.35
Post.Prob Star
1 0.26 *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
lambda_kidneys <- hyp$hypothesis$EstimateI will next extract, for each sex, the estimated slope and intercept, together with the 95% credible intervals.
# Extract combined coefficients (fixed + random)
coef_sex <-
coef(model_phylo_kidneys)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
sex,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size,
)
coef_sex# A tibble: 2 × 4
sex slope slope_low slope_high
<chr> <dbl> <dbl> <dbl>
1 female 0.803 0.732 0.877
2 male 0.795 0.725 0.865
Lets now visualise these estimates
# Create new data with sequences for each sex
new_data <- dat_organ %>%
group_by(sex) %>%
summarise(
min_body = min(log10_body_size),
max_body = max(log10_body_size),
.groups = "drop"
) %>%
rowwise() %>%
mutate(
log10_body_size = list(seq(min_body, max_body, length.out = 100))
) %>%
unnest(log10_body_size) %>%
select(sex, log10_body_size)
# Generate posterior predicted draws
draws_by_sex <- epred_draws(
model_phylo_kidneys,
newdata = new_data,
re_formula = ~ (1 + log10_body_size | sex)
)# Prepare labels
label_pos <- dat_organ %>%
group_by(sex) %>%
summarise(
x_lab = 1.1,
y_lab = 2.2,
.groups = "drop"
)
# Build the label text
slopes_labels <- coef_sex %>%
left_join(label_pos, by = "sex") %>%
mutate(
label = sprintf("β = %.2f [%.2f, %.2f]", slope, slope_low, slope_high)
)
# Plot points and predicted lines with credible intervals
ggplot() +
geom_point(
data = dat_organ,
aes(x = log10_body_size, y = log10_organ_size),
shape = 21, colour = "black", fill = "grey",
alpha = 0.5, size = 3
) +
stat_lineribbon(
data = draws_by_sex,
aes(x = log10_body_size, y = .epred, fill = stat(.width)),
alpha = 0.5, colour = NA
) +
geom_text(
data = slopes_labels,
aes(x = 1.1, y = 2.2, label = label),
hjust = 0, vjust = 1, size = 4, font = 2
) +
scale_fill_distiller(palette = "Purples", direction = 1) +
labs(
x = expression(log[10]*" Body size"),
y = expression(log[10]*" Kidneys size"),
title = "Kidneys size ~ Body size + (1 + Body size | Sex) + (1 | Phylogeny)"
) +
facet_wrap(vars(sex), nrow = 1) +
theme_bw() +
theme(
plot.title = element_text(size = 14),
strip.text = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.position = "none"
)loo_simple <- loo(model_simple_kidneys)
loo_phylo <- loo(model_phylo_kidneys)
loo_compare(loo_simple, loo_phylo) elpd_diff se_diff
model_phylo_kidneys 0.0 0.0
model_simple_kidneys -7.9 3.7
In this comparison, the model that accounts for phylogeny is clearly preferable to the model that treats species as independent observational units. The phylogenetic model (model_phylo_kidneys) is used as the reference with elpd_diff = 0, whereas the simpler model (model_simple_kidneys) has an expected log predictive density (ELPD) that is about 7.9 units lower, with an associated standard error of 3.7. In the context of leave‑one‑out cross‑validation, differences exceeding roughly four times their standard error are typically regarded as strong evidence. Here, 7.9/3.7≈2.1, indicating both models are similarly good.
The next task is to extract the coefficients from all 27 models in order to prepare a table (Table 2 in the paper) that presents the coefficients by sex, global coefficients. Additionally, I will include the Pagel’s \(\lambda\) representing the phylogenetic signal.
# list of models
models_list_simple <- list(
model_simple_liver,
model_simple_brain,
model_simple_heart,
model_simple_pancreas,
model_simple_pituitary_glands,
model_simple_spleen,
model_simple_thymus,
model_simple_stomach,
model_simple_small_intestine,
model_simple_large_intestine,
model_simple_lungs,
model_simple_adrenal_glands,
model_simple_kidneys,
model_simple_muscle
)
models_list_phylo <- list(
model_phylo_liver,
model_phylo_brain,
model_phylo_heart,
model_phylo_pancreas,
model_phylo_pituitary_glands,
model_phylo_spleen,
model_phylo_thymus,
model_phylo_stomach,
model_phylo_small_intestine,
model_phylo_large_intestine,
model_phylo_lungs,
model_phylo_adrenal_glands,
model_phylo_kidneys
)
# Organ names for simple. models
names(models_list_simple) <- c(
"liver","brain","heart","pancreas","pituitary_glands","spleen","thymus",
"stomach","small_intestine","large_intestine","lungs","adrenal_glands",
"kidneys","muscle"
)
# Organ names for phylogenetic models
names(models_list_phylo) <- c(
"liver","brain","heart","pancreas","pituitary_glands","spleen","thymus",
"stomach","small_intestine","large_intestine","lungs","adrenal_glands",
"kidneys"
)
# Hypothesis string for lambda (phylogenetic signal), based on the vignette
# available here:
# https://cran.r-project.org/web/packages/brms/vignettes/brms_phylogenetics.html
hyp_lambda <- paste0(
"sd_phylo__Intercept^2 /",
"(sd_phylo__Intercept^2 + sd_sex__Intercept^2 + sigma^2) = 0"
)
# Make a function to do the process less repetitive. It will extract from each
# model sex-specific coefficients + global slope + lambda (for phylo models only)
extract_all_coefs <- function(fit, organ, model_type) {
# Sex-specific coefficients (random effects)
coef_sex <- coef(fit)$sex %>%
as_tibble(rownames = "sex") %>%
select(sex, matches("Estimate|Q2.5|Q97.5")) %>%
transmute(
model_type,
organ,
sex,
intercept = Estimate.Intercept,
intercept_low = Q2.5.Intercept,
intercept_high = Q97.5.Intercept,
slope = Estimate.log10_body_size,
slope_low = Q2.5.log10_body_size,
slope_high = Q97.5.log10_body_size
)
# Global slope (fixed effect)
global_slope_vec <- fixef(fit)["log10_body_size", ]
global_slope <- tibble(
global_slope = global_slope_vec["Estimate"],
global_slope_low = global_slope_vec["Q2.5"],
global_slope_high = global_slope_vec["Q97.5"]
)
# Lambda (phylogenetic signal) - only for phylo models
if(model_type == "phylo") {
hyp_lambda <- hypothesis(fit, hyp_lambda, class = NULL)
lambda_vals <- hyp_lambda$hypothesis %>%
transmute(
lambda = Estimate
)
} else {
lambda_vals <- tibble(lambda = NA_real_)
}
# Combine everything
coef_sex %>%
bind_cols(global_slope, lambda_vals)
}
# Create complete data frames
df_simple_all_organs <- imap_dfr(
models_list_simple,
~ extract_all_coefs(.x, organ = .y, model_type = "simple")
)
df_phylo_all_organs <- imap_dfr(
models_list_phylo,
~ extract_all_coefs(.x, organ = .y, model_type = "phylo")
)
# Final data frame with everything
df_all_organs <- bind_rows(df_simple_all_organs, df_phylo_all_organs)
# add R² (in case is needed by reviewers)
r2_simple <- imap(models_list_simple, ~ bayes_R2(.x)["R2", "Estimate"])
r2_phylo <- imap(models_list_phylo, ~ bayes_R2(.x)["R2", "Estimate"])
df_all_organs <- df_all_organs %>%
arrange(model_type,organ) %>%
mutate(R2 = case_when(
model_type == "simple" ~ r2_simple[organ],
model_type == "phylo" ~ r2_phylo[organ]
))df_all_organs_ci <- df_all_organs %>%
mutate(
intercept_ci = sprintf("%.2f [%.2f, %.2f]",
intercept, intercept_low, intercept_high),
slope_ci = sprintf("%.2f [%.2f, %.2f]",
slope, slope_low, slope_high),
global_slope_ci = sprintf("%.2f [%.2f, %.2f]",
global_slope, global_slope_low, global_slope_high)
) %>%
select(
model_type,
organ,
sex,
intercept_ci,
slope_ci,
global_slope_ci,
lambda,
R2
)
# Table with vertical separators and fixed column widths
df_all_organs_ci %>%
kable(
align = c("l", "l", "l", "l", "l", "l", "l", "l"),
full_width = FALSE,
table.attr = 'width="100%"'
) %>%
kable_styling() %>%
column_spec(1, width = "10%") %>% # model_type
column_spec(2, width = "15%", border_left = TRUE) %>% # organ
column_spec(3, width = "7%", border_left = TRUE) %>% # sex
column_spec(4, width = "17%", border_left = TRUE) %>% # intercept_ci
column_spec(5, width = "17%", border_left = TRUE) %>% # slope_ci
column_spec(6, width = "17%", border_left = TRUE) %>% # global_slope_ci
column_spec(7, width = "7%", border_left = TRUE) %>% # lambda
column_spec(8, width = "7%", border_left = TRUE) # R2| model_type | organ | sex | intercept_ci | slope_ci | global_slope_ci | lambda | R2 |
|---|---|---|---|---|---|---|---|
| phylo | adrenal_glands | female | -2.55 [-3.22, -1.71] | 0.59 [0.36, 0.78] | 0.62 [0.31, 0.95] | 0.63 | 0.99 |
| phylo | adrenal_glands | male | -2.69 [-3.36, -1.85] | 0.61 [0.39, 0.79] | 0.62 [0.31, 0.95] | 0.63 | 0.99 |
| phylo | brain | female | 0.32 [-0.82, 1.51] | 0.10 [-0.00, 0.23] | 0.12 [-0.07, 0.38] | 0.93 | 1 |
| phylo | brain | male | 0.34 [-0.80, 1.51] | 0.10 [0.01, 0.23] | 0.12 [-0.07, 0.38] | 0.93 | 1 |
| phylo | heart | female | -2.31 [-3.06, -1.43] | 0.80 [0.57, 1.02] | 0.79 [0.31, 1.10] | 0.87 | 1 |
| phylo | heart | male | -2.32 [-3.06, -1.45] | 0.82 [0.61, 1.03] | 0.79 [0.31, 1.10] | 0.87 | 1 |
| phylo | kidneys | female | -1.64 [-1.89, -1.40] | 0.80 [0.73, 0.88] | 0.79 [0.57, 0.99] | 0.40 | 0.99 |
| phylo | kidneys | male | -1.60 [-1.85, -1.35] | 0.79 [0.73, 0.87] | 0.79 [0.57, 0.99] | 0.40 | 0.99 |
| phylo | large_intestine | female | -1.77 [-2.68, -0.87] | 0.67 [0.31, 1.03] | 0.68 [0.25, 1.14] | 0.25 | 0.87 |
| phylo | large_intestine | male | -1.77 [-2.68, -0.86] | 0.66 [0.30, 1.03] | 0.68 [0.25, 1.14] | 0.25 | 0.87 |
| phylo | liver | female | -1.39 [-1.61, -1.17] | 0.88 [0.82, 0.94] | 0.86 [0.57, 1.07] | 0.47 | 0.99 |
| phylo | liver | male | -1.39 [-1.62, -1.18] | 0.88 [0.82, 0.93] | 0.86 [0.57, 1.07] | 0.47 | 0.99 |
| phylo | lungs | female | -1.90 [-2.63, -0.84] | 0.75 [0.35, 1.05] | 0.72 [0.32, 1.09] | 0.72 | 0.99 |
| phylo | lungs | male | -1.87 [-2.58, -0.82] | 0.74 [0.36, 1.01] | 0.72 [0.32, 1.09] | 0.72 | 0.99 |
| phylo | pancreas | female | -1.72 [-2.22, -0.99] | 0.69 [0.45, 0.85] | 0.70 [0.39, 0.95] | 0.43 | 0.99 |
| phylo | pancreas | male | -1.72 [-2.21, -1.00] | 0.70 [0.46, 0.84] | 0.70 [0.39, 0.95] | 0.43 | 0.99 |
| phylo | pituitary_glands | female | -3.10 [-4.19, -1.99] | 0.52 [0.16, 0.88] | 0.56 [0.15, 1.03] | 0.60 | 0.96 |
| phylo | pituitary_glands | male | -3.12 [-4.18, -2.03] | 0.52 [0.17, 0.86] | 0.56 [0.15, 1.03] | 0.60 | 0.96 |
| phylo | small_intestine | female | -1.14 [-2.85, 0.71] | 0.74 [0.02, 1.45] | 0.74 [0.04, 1.41] | 0.80 | 0.99 |
| phylo | small_intestine | male | -1.20 [-2.92, 0.66] | 0.77 [0.05, 1.49] | 0.74 [0.04, 1.41] | 0.80 | 0.99 |
| phylo | spleen | female | -2.89 [-3.74, -1.96] | 0.96 [0.72, 1.18] | 0.95 [0.57, 1.28] | 0.75 | 0.97 |
| phylo | spleen | male | -2.93 [-3.79, -1.99] | 0.96 [0.72, 1.16] | 0.95 [0.57, 1.28] | 0.75 | 0.97 |
| phylo | stomach | female | -2.26 [-3.07, -1.27] | 1.01 [0.63, 1.31] | 0.98 [0.52, 1.36] | 0.36 | 0.89 |
| phylo | stomach | male | -2.26 [-3.07, -1.27] | 1.00 [0.62, 1.31] | 0.98 [0.52, 1.36] | 0.36 | 0.89 |
| phylo | thymus | female | -1.31 [-2.71, 0.41] | 0.41 [-0.13, 0.86] | 0.46 [-0.09, 0.96] | 0.40 | 0.83 |
| phylo | thymus | male | -1.31 [-2.76, 0.40] | 0.44 [-0.07, 0.88] | 0.46 [-0.09, 0.96] | 0.40 | 0.83 |
| simple | adrenal_glands | female | -3.07 [-3.32, -2.81] | 0.79 [0.69, 0.89] | 0.79 [0.45, 1.12] | 0.94 | |
| simple | adrenal_glands | male | -3.17 [-3.44, -2.91] | 0.79 [0.69, 0.90] | 0.79 [0.45, 1.12] | 0.94 | |
| simple | brain | female | -1.56 [-2.03, -1.09] | 0.83 [0.65, 1.00] | 0.81 [0.48, 1.10] | 0.72 | |
| simple | brain | male | -1.56 [-2.03, -1.09] | 0.81 [0.63, 0.97] | 0.81 [0.48, 1.10] | 0.72 | |
| simple | heart | female | -2.98 [-3.14, -2.83] | 1.25 [1.18, 1.32] | 1.22 [0.77, 1.45] | 0.97 | |
| simple | heart | male | -2.94 [-3.09, -2.79] | 1.24 [1.18, 1.31] | 1.22 [0.77, 1.45] | 0.97 | |
| simple | kidneys | female | -1.62 [-1.70, -1.54] | 0.79 [0.76, 0.83] | 0.78 [0.57, 0.97] | 0.99 | |
| simple | kidneys | male | -1.58 [-1.66, -1.50] | 0.78 [0.75, 0.82] | 0.78 [0.57, 0.97] | 0.99 | |
| simple | large_intestine | female | -1.80 [-2.16, -1.44] | 0.67 [0.50, 0.86] | 0.68 [0.35, 1.03] | 0.87 | |
| simple | large_intestine | male | -1.79 [-2.14, -1.41] | 0.66 [0.48, 0.84] | 0.68 [0.35, 1.03] | 0.87 | |
| simple | liver | female | -1.32 [-1.43, -1.20] | 0.89 [0.84, 0.94] | 0.88 [0.59, 1.12] | 0.96 | |
| simple | liver | male | -1.34 [-1.46, -1.22] | 0.90 [0.85, 0.95] | 0.88 [0.59, 1.12] | 0.96 | |
| simple | lungs | female | -2.56 [-2.86, -2.27] | 1.11 [0.97, 1.25] | 1.08 [0.72, 1.35] | 0.91 | |
| simple | lungs | male | -2.54 [-2.83, -2.24] | 1.09 [0.96, 1.22] | 1.08 [0.72, 1.35] | 0.91 | |
| simple | muscle | female | 0.53 [-0.50, 1.42] | 0.60 [0.30, 0.94] | 0.55 [0.18, 0.91] | 0.91 | |
| simple | muscle | male | 0.80 [-0.07, 1.70] | 0.52 [0.23, 0.80] | 0.55 [0.18, 0.91] | 0.91 | |
| simple | pancreas | female | -1.83 [-2.10, -1.56] | 0.73 [0.65, 0.82] | 0.73 [0.48, 0.96] | 0.98 | |
| simple | pancreas | male | -1.83 [-2.11, -1.57] | 0.74 [0.66, 0.82] | 0.73 [0.48, 0.96] | 0.98 | |
| simple | pituitary_glands | female | -3.50 [-4.27, -2.71] | 0.66 [0.39, 0.92] | 0.67 [0.34, 1.05] | 0.72 | |
| simple | pituitary_glands | male | -3.52 [-4.30, -2.72] | 0.65 [0.40, 0.91] | 0.67 [0.34, 1.05] | 0.72 | |
| simple | small_intestine | female | -1.41 [-2.32, -0.46] | 0.89 [0.43, 1.33] | 0.87 [0.36, 1.37] | 0.68 | |
| simple | small_intestine | male | -1.44 [-2.35, -0.46] | 0.90 [0.44, 1.36] | 0.87 [0.36, 1.37] | 0.68 | |
| simple | spleen | female | -3.09 [-3.35, -2.83] | 1.10 [0.99, 1.22] | 1.08 [0.78, 1.34] | 0.9 | |
| simple | spleen | male | -3.13 [-3.41, -2.87] | 1.10 [0.99, 1.21] | 1.08 [0.78, 1.34] | 0.9 | |
| simple | stomach | female | -2.56 [-3.01, -2.12] | 1.15 [0.98, 1.33] | 1.12 [0.72, 1.43] | 0.86 | |
| simple | stomach | male | -2.55 [-3.00, -2.09] | 1.15 [0.96, 1.32] | 1.12 [0.72, 1.43] | 0.86 | |
| simple | thymus | female | -1.82 [-2.48, -1.19] | 0.58 [0.37, 0.80] | 0.60 [0.29, 0.93] | 0.79 | |
| simple | thymus | male | -1.83 [-2.51, -1.15] | 0.60 [0.39, 0.82] | 0.60 [0.29, 0.93] | 0.79 |
# Set manual colors by organ
all_organs <- unique(df_all_organs$organ)
n_organs <- length(all_organs)
all_organs_sorted <- sort(all_organs)
my_cols <- c(
adrenal_glands = "#D2AA06",
brain = "#A9A9A9",
heart = "#DC143C",
kidneys = "#D1600F",
large_intestine = "#1B9E77",
liver = "#8B0000",
lungs = "#FFFF00",
muscle = "#4169E1",
pancreas = "#FF69B4",
pituitary_glands = "#D2691E",
small_intestine = "#8DA715",
spleen = "#B22222",
stomach = "#DDA0DD",
thymus = "black"
)
# PLOT 1: Slopes derived from the phylogenetic model
df1 <- df_all_organs %>%
filter(model_type == "phylo", sex %in% c("male", "female")) %>%
select(organ, sex, slope, slope_low, slope_high) %>%
pivot_wider(names_from = sex, values_from = c(slope, slope_low, slope_high), names_sep = "_") %>%
mutate(male_xmin = slope_low_male, male_xmax = slope_high_male,
female_ymin = slope_low_female, female_ymax = slope_high_female)
p1 <- ggplot(df1, aes(x = slope_male, y = slope_female, colour = organ)) +
geom_errorbarh(aes(xmin = male_xmin, xmax = male_xmax), height = 0.015, size = 0.6) +
geom_errorbar(aes(ymin = female_ymin, ymax = female_ymax), width = 0.015, size = 0.6) +
geom_point(size = 2) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", colour = "black", size = 0.8) +
scale_colour_manual(values = my_cols) +
labs(x = "Male slope", y = "Female slope") +
coord_fixed() +
theme_bw() +
theme(
legend.position = "none",
plot.margin = margin(15, 15, 15, 15),
axis.text = element_text(size = 12),
axis.title.x = element_text(margin = margin(t = 12)),
axis.title.y = element_text(margin = margin(r = 5)),
axis.title = element_text(size = 12)
) +
scale_x_continuous(limits = c(-0.25, 1.5)) +
scale_y_continuous(limits = c(-0.25, 1.5)) +
theme(panel.border = element_rect(colour = "black", fill = NA, size = 1))
# PLOT 2: Slopes derived from the simple model
df2 <- df_all_organs %>%
filter(model_type == "simple", sex %in% c("male", "female")) %>%
select(organ, sex, slope, slope_low, slope_high) %>%
pivot_wider(names_from = sex, values_from = c(slope, slope_low, slope_high), names_sep = "_") %>%
mutate(male_xmin = slope_low_male, male_xmax = slope_high_male,
female_ymin = slope_low_female, female_ymax = slope_high_female)
p2 <- ggplot(df2, aes(x = slope_male, y = slope_female, colour = organ)) +
geom_errorbarh(aes(xmin = male_xmin, xmax = male_xmax), height = 0.015, size = 0.6) +
geom_errorbar(aes(ymin = female_ymin, ymax = female_ymax), width = 0.015, size = 0.6) +
geom_point(size = 2) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", colour = "black", size = 0.8) +
scale_colour_manual(values = my_cols) + labs(x = "Male slope", y = "Female slope") +
coord_fixed() +
theme_bw() +
theme(
legend.position = "none",
plot.margin = margin(15, 15, 15, 15),
axis.text = element_text(size = 12),
axis.title.x = element_text(margin = margin(t = 12)),
axis.title.y = element_text(margin = margin(r = 5)),
axis.title = element_text(size = 12)
) +
scale_x_continuous(limits = c(-0.25, 1.5)) +
scale_y_continuous(limits = c(-0.25, 1.5)) +
theme(panel.border = element_rect(colour = "black", fill = NA, size = 1))
# PLOT 3: Intercepts derived from the phylogenetic model
df3 <- df_all_organs %>%
filter(model_type == "phylo", sex %in% c("male", "female")) %>%
select(organ, sex, intercept, intercept_low, intercept_high) %>%
pivot_wider(names_from = sex, values_from = c(intercept, intercept_low, intercept_high), names_sep = "_") %>%
mutate(male_xmin = intercept_low_male, male_xmax = intercept_high_male,
female_ymin = intercept_low_female, female_ymax = intercept_high_female)
p3 <- ggplot(df3, aes(x = intercept_male, y = intercept_female, colour = organ)) +
geom_errorbarh(aes(xmin = male_xmin, xmax = male_xmax), height = 0.015, size = 0.6) +
geom_errorbar(aes(ymin = female_ymin, ymax = female_ymax), width = 0.015, size = 0.6) +
geom_point(size = 2) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", colour = "black", size = 0.8) +
scale_colour_manual(values = my_cols) +
labs(x = "Male intercept", y = "Female intercept") +
coord_fixed() +
theme_bw() +
theme(
legend.position = "none",
plot.margin = margin(15, 15, 15, 15),
axis.text = element_text(size = 12),
axis.title.x = element_text(margin = margin(t = 12)),
axis.title.y = element_text(margin = margin(r = 5)),
axis.title = element_text(size = 12)
) +
scale_x_continuous(limits = c(-5, 2)) +
scale_y_continuous(limits = c(-5, 2)) +
theme(panel.border = element_rect(colour = "black", fill = NA, size = 1))
# PLOT 4: Intercepts derived from the simple model
df4 <- df_all_organs %>%
filter(model_type == "simple", sex %in% c("male", "female")) %>%
select(organ, sex, intercept, intercept_low, intercept_high) %>%
pivot_wider(names_from = sex, values_from = c(intercept, intercept_low, intercept_high), names_sep = "_") %>%
mutate(male_xmin = intercept_low_male, male_xmax = intercept_high_male,
female_ymin = intercept_low_female, female_ymax = intercept_high_female)
p4 <- ggplot(df4, aes(x = intercept_male, y = intercept_female, colour = organ)) +
geom_errorbarh(aes(xmin = male_xmin, xmax = male_xmax), height = 0.015, size = 0.6) +
geom_errorbar(aes(ymin = female_ymin, ymax = female_ymax), width = 0.015, size = 0.6) +
geom_point(size = 2) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", colour = "black", size = 0.8) +
scale_colour_manual(values = my_cols) +
labs(x = "Male intercept", y = "Female intercept") +
coord_fixed() +
theme_bw() +
theme(
legend.position = "none",
plot.margin = margin(15, 15, 15, 15),
axis.text = element_text(size = 12),
axis.title.x = element_text(margin = margin(t = 12)),
axis.title.y = element_text(margin = margin(r = 5)),
axis.title = element_text(size = 12)
) +
scale_x_continuous(limits = c(-5, 2)) +
scale_y_continuous(limits = c(-5, 2)) +
theme(panel.border = element_rect(colour = "black", fill = NA, size = 1))
# Combining Panels
Figure_3 <- plot_grid(
p1,
p3,
p2,
p4,
labels = c("A", "B", "C", "D"),
nrow = 2,
ncol = 2,
label_size = 15,
align = "v")
Figure_3# Lets make one plot with legend
legend_plot <- ggplot(df2, aes(x = slope_male, y = slope_female, colour = organ)) +
geom_point(size = 3) +
scale_colour_manual(values = my_cols) +
theme_bw() +
theme(
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size = 10)
) +
guides(
colour = guide_legend(
nrow = 2,
ncol = 7,
byrow = TRUE
)
)
# Extract legend and combine with Figure 2
shared_legend <- get_legend(legend_plot)
Figure_3 <- plot_grid(
shared_legend,
Figure_3,
ncol = 1,
rel_heights = c(0.15, 1)
)
# Lets now export the figure
ggsave("../outputs/Figure_3.png", Figure_3, width = 8, height = 8, dpi = 1200)
ggsave("../outputs/Figure_3.pdf", Figure_3, width = 8, height = 8)R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: rstan(v.2.32.7), StanHeaders(v.2.32.10), RColorBrewer(v.1.1-3), modelr(v.0.1.11), broom(v.1.0.8), viridis(v.0.6.5), viridisLite(v.0.4.2), posterior(v.1.6.1), lubridate(v.1.9.4), forcats(v.1.0.0), readr(v.2.1.5), tidyverse(v.2.0.0), furrr(v.0.3.1), future(v.1.40.0), cmdstanr(v.0.9.0), purrr(v.1.0.4), loo(v.2.8.0), bayestestR(v.0.16.1), tidybayes(v.3.0.7), bayesplot(v.1.13.0), dplyr(v.1.1.4), brms(v.2.22.0), Rcpp(v.1.1.0), tidyr(v.1.3.1), stringr(v.1.5.1), details(v.0.4.0), ggthemes(v.5.1.0), tibble(v.3.2.1), ggtree(v.3.10.1), cowplot(v.1.1.3), ggpubr(v.0.6.0), ggplot2(v.3.5.2), kableExtra(v.1.4.0), readxl(v.1.4.5), phytools(v.2.4-4), maps(v.3.4.3), ape(v.5.8-1) and knitr(v.1.50)
loaded via a namespace (and not attached): tensorA(v.0.36.2.1), rstudioapi(v.0.17.1), jsonlite(v.2.0.0), magrittr(v.2.0.3), estimability(v.1.5.1), farver(v.2.1.2), rmarkdown(v.2.29), ragg(v.1.4.0), fs(v.1.6.6), vctrs(v.0.6.5), rstatix(v.0.7.2), htmltools(v.0.5.8.1), curl(v.6.2.3), distributional(v.0.5.0), DEoptim(v.2.2-8), cellranger(v.1.1.0), Formula(v.1.2-5), gridGraphics(v.0.5-1), parallelly(v.1.45.0), htmlwidgets(v.1.6.4), desc(v.1.4.3), plyr(v.1.8.9), emmeans(v.1.11.1), igraph(v.2.1.4), lifecycle(v.1.0.4), iterators(v.1.0.14), pkgconfig(v.2.0.3), Matrix(v.1.6-1.1), R6(v.2.6.1), fastmap(v.1.2.0), digest(v.0.6.37), numDeriv(v.2016.8-1.1), aplot(v.0.2.5), colorspace(v.2.1-1), patchwork(v.1.3.0), ps(v.1.9.1), textshaping(v.1.0.1), labeling(v.0.4.3), clusterGeneration(v.1.3.8), timechange(v.0.3.0), httr(v.1.4.7), abind(v.1.4-8), compiler(v.4.3.2), pander(v.0.6.6), withr(v.3.0.2), doParallel(v.1.0.17), inline(v.0.3.21), backports(v.1.5.0), optimParallel(v.1.0-2), carData(v.3.0-5), QuickJSR(v.1.7.0), pkgbuild(v.1.4.8), ggsignif(v.0.6.4), MASS(v.7.3-60), scatterplot3d(v.0.3-44), tools(v.4.3.2), clipr(v.0.8.0), glue(v.1.8.0), quadprog(v.1.5-8), nlme(v.3.1-168), grid(v.4.3.2), checkmate(v.2.3.2), reshape2(v.1.4.4), generics(v.0.1.4), gtable(v.0.3.6), tzdb(v.0.5.0), data.table(v.1.17.4), hms(v.1.1.3), utf8(v.1.2.5), xml2(v.1.3.8), car(v.3.1-3), foreach(v.1.5.2), pillar(v.1.10.2), ggdist(v.3.3.3), yulab.utils(v.0.2.0), treeio(v.1.26.0), lattice(v.0.22-7), tidyselect(v.1.2.1), gridExtra(v.2.3), arrayhelpers(v.1.1-0), V8(v.6.0.4), svglite(v.2.2.1), stats4(v.4.3.2), xfun(v.0.52), expm(v.1.0-0), bridgesampling(v.1.1-2), matrixStats(v.1.5.0), stringi(v.1.8.7), lazyeval(v.0.2.2), ggfun(v.0.1.8), yaml(v.2.3.10), evaluate(v.1.0.4), codetools(v.0.2-20), ggplotify(v.0.1.2), cli(v.3.6.5), RcppParallel(v.5.1.10), xtable(v.1.8-4), systemfonts(v.1.2.3), processx(v.3.8.6), globals(v.0.17.0), coda(v.0.19-4.1), png(v.0.1-8), svUnit(v.1.0.6), parallel(v.4.3.2), rstantools(v.2.4.0), Brobdingnag(v.1.2-9), listenv(v.0.9.1), phangorn(v.2.12.1), mvtnorm(v.1.3-3), tidytree(v.0.4.6), scales(v.1.4.0), insight(v.1.3.1), combinat(v.0.0-8), rlang(v.1.1.6), fastmatch(v.1.1-6) and mnormt(v.2.1.1)